def predict(self, img, scale, filp=[False, False]):
        self.extractor.eval()
        self.rpn.eval()
        self.roi_head.eval()

        n = img.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = img.shape
        img_size = (H, W)

        # ------------------ 预测 -------------------#
        with torch.no_grad():
            scale = utils.totensor(scale)
            img = utils.totensor(img)
            features = self.extractor(img)
            rpn_loc, rpn_score, roi, _ = self.rpn(features,
                                                  img_size,
                                                  scale,
                                                  training=False)
            roi_cls_loc, roi_cls_score = self.roi_head(features, roi)

            n_roi = roi.shape[0]
            roi_cls_loc = roi_cls_loc.view(n_roi, self.n_class, 4)
            roi = utils.totensor(
                dataset_utils.bbox_inverse(roi, (H, W), filp, scale))
            # mean = utils.totensor(self.loc_normalize_mean)
            # std = utils.totensor(self.loc_normalize_std)
            # roi_cls_loc = (roi_cls_loc * std + mean)

            prob = F.softmax(roi_cls_score, dim=1)  # shape:(n_roi,21)
            label = torch.max(prob, dim=1)[1]  # shape:(n_roi,)
            index = utils.totensor(np.arange(0, n_roi)).long()
            roi_cls_loc = roi_cls_loc[index, label, :]
            label = utils.tonumpy(label).astype(int)

            cls_bbox = utils.loc2bbox(utils.tonumpy(roi),
                                      utils.tonumpy(roi_cls_loc))
            # clip bounding box
            cls_bbox[:, 0::2] = np.clip(cls_bbox[:, 0::2], 0, H)
            cls_bbox[:, 1::2] = np.clip(cls_bbox[:, 1::2], 0, W)

            # ignore background
            background_mask = np.where(label != 0)[0]
            cls_bbox = cls_bbox[background_mask]
            label = label[background_mask]

            # prob = F.softmax(roi_cls_score,dim=1)   # shape:(n_roi,21)
            # label = torch.max(prob,dim=1)[1]        # shape:(n_roi,)
            # mask_label = np.where(label.cpu().numpy()!=0)[0]
            # # print(label.cpu().numpy())
            # bbox = torch.gather(cls_bbox, 1, label.view(-1, 1).unsqueeze(2).repeat(1, 1, 4)).squeeze(1)

            # # delete background
            # label = label.cpu().numpy()[mask_label]
            # bbox = bbox.cpu().numpy()[mask_label]
            # print(cls_bbox.shape,label.shape)

        return cls_bbox, label
Esempio n. 2
0
 def forward(self, x, rois=None, im_indices=None, k=0, in_layer='conv1', out_layer='capsule',fea_view = False):
     if rois is not None:
         im_indices = totensor(im_indices).float()
         rois = totensor(rois).float()
         indices_and_rois = torch.cat([im_indices[:, None], rois], dim=1)
     conv3_fea = []
     run = False
     for name, module in self.layers.named_children():
         if name == in_layer:
             run = True
         if run:
             x = module(x)
             if name == 'conv3':
                 conv3_fea = x
                 x = self.roi(x, indices_and_rois) # [len(im_indices),512,7,7]
             if name == out_layer:
                 if fea_view & (name == 'conv3'):
                     x = x.view(len(im_indices),-1)
                 return x
     if not self.all_fc:
         x = self.primary(x)
         cap_out = self.branches[k](x)
     else:
         #x = squash(x, dim=1) # [-,512,7,7]
         x = x.view(x.shape[0],-1)
         cap_out = self.cap_fc(x).unsqueeze(2).unsqueeze(3)# [batch_size, 2, 1, 1]
     if out_layer=='capsule':
         x = torch.sqrt((cap_out**2).sum(dim=2)).view(-1,self.num_predictions) # [len(im_indices),2]
         return x, conv3_fea
    def train_roi_head(self, roi, features, bbox, label):

        # ------------------ ROI Nework -------------------#
        # ------------------ ROI 标注 -------------------#
        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类
        # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正
        # gt_roi_label:N+1类,多了一个背景类(是不是物体)
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi,
            utils.tonumpy(bbox),
            utils.tonumpy(label),
        )
        # self.loc_normalize_mean,
        # self.loc_normalize_std,)
        # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问)

        # #debug
        # print('debug')
        # gt_roi_loc_ = utils.loc2bbox(sample_roi,gt_roi_loc)
        # print(gt_roi_loc_.shape)
        # print(gt_roi_loc_[:10])
        # gt_roi_label_ = gt_roi_label-1
        # # gt_rpn_loc_ = gt_rpn_loc_[gt_rpn_label.numpy()>=0]
        # # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_rpn_loc_,)
        # gt_roi_loc_ = gt_roi_loc_[gt_roi_label_>=0]
        # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_roi_loc_,gt_roi_label_)

        # ------------------ ROI 预测 -------------------#
        # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测
        # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc
        roi_cls_loc, roi_cls_score = self.roi_head(features, sample_roi)

        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1,
                                       4)  # [n_sample, n_class+1, 4]
        # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测
        # roi_loc就是在ground truth上的类的位置预测
        gt_roi_loc = utils.totensor(gt_roi_loc)
        gt_roi_label = utils.totensor(gt_roi_label).long()
        index = utils.totensor(np.arange(0, n_sample)).long()
        roi_loc = roi_cls_loc[index,
                              gt_roi_label].contiguous()  # [num_sample,4]

        # roi loc loss(位置回归loss)
        roi_loc_loss = self._roi_loc_loss(roi_loc, gt_roi_loc)

        # roi cls loss(分类loss,这里分21类)
        roi_cls_loss = F.cross_entropy(roi_cls_score, gt_roi_label)

        roi_loss = roi_loc_loss + roi_cls_loss

        self.roi_optimizer.zero_grad()
        roi_loss.backward()
        self.roi_optimizer.step()

        return roi_loc_loss, roi_cls_loss
Esempio n. 4
0
    def __getitem__(self, index):
        gt_path = self.data_info['gt_path'][index]
        lq_path_list = self.data_info['lq_path'][index]

        # get gt 4-th frame
        img_bytes = self.file_client.get(gt_path)
        img_gt = _bytes2img(img_bytes)  # (H W [BGR])

        # get lq 7 frames
        img_lqs = []
        for lq_path in lq_path_list:
            img_bytes = self.file_client.get(lq_path)
            img_lq = _bytes2img(img_bytes)  # (H W [BGR])
            img_lqs.append(img_lq)

        # to tensor
        img_lqs.append(img_gt)
        img_results = totensor(img_lqs)
        img_lqs = torch.stack(img_results[0:-1], dim=0)
        img_gt = img_results[-1]

        return {
            'lq': img_lqs,  # (T [RGB] H W)
            'gt': img_gt,  # ([RGB] H W)
        }
 def _roi_loc_loss(
     self,
     pred_loc,
     gt_loc,
 ):
     in_weight = torch.ones(gt_loc.shape)
     in_weight = utils.totensor(in_weight)
     loc_loss = self._smooth_l1_loss(pred_loc, gt_loc, in_weight)
     loc_loss /= gt_loc.shape[0]
     return loc_loss
Esempio n. 6
0
    def __getitem__(self, index):
        if self.file_client is None:
            self.file_client = FileClient(
                self.io_opts_dict.pop('type'), **self.io_opts_dict
            )
        # random reverse
        if self.opts_dict['random_reverse'] and random.random() < 0.5:
            self.neighbor_list.reverse()

        # ==========
        # get frames
        # ==========

        # get the GT frame (im4.png)
        gt_size = self.opts_dict['gt_size']
        key = self.keys[index]
        clip, seq, _ = key.split('/')  # key example: 00001/0001/im1.png

        img_gt_path = key
        img_bytes = self.file_client.get(img_gt_path, 'gt')
        img_gt = _bytes2img(img_bytes)  # (H W 1)

        # get the neighboring LQ frames
        img_lqs = []
        for neighbor in self.neighbor_list:
            img_lq_path = f'{clip}/{seq}/im{neighbor}.png'
            img_bytes = self.file_client.get(img_lq_path, 'lq')
            img_lq = _bytes2img(img_bytes)  # (H W 1)
            img_lqs.append(img_lq)

        # ==========
        # data augmentation
        # ==========
        
        # randomly crop
        img_gt, img_lqs = paired_random_crop(
            img_gt, img_lqs, gt_size, img_gt_path
            )

        # flip, rotate
        img_lqs.append(img_gt)  # gt joint augmentation with lq
        img_results = augment(
            img_lqs, self.opts_dict['use_flip'], self.opts_dict['use_rot']
            )

        # to tensor
        img_results = totensor(img_results)
        img_lqs = torch.stack(img_results[0:-1], dim=0)
        img_gt = img_results[-1]

        return {
            'lq': img_lqs,  # (T [RGB] H W)
            'gt': img_gt,  # ([RGB] H W)
            }
 def _rpn_cls_loss(self, pred_label, gt_label):
     # in_weight = torch.zeros(gt_label.shape)
     in_weight = (gt_label >= 0)
     # in_weight[(gt_label > 0).view(-1, 1).expand_as(in_weight)] = 1
     in_weight = utils.totensor(in_weight)
     cls_loss = F.binary_cross_entropy(pred_label,
                                       gt_label.float(),
                                       in_weight,
                                       reduction='sum')
     cls_loss /= (gt_label >= 0).sum().float()
     return cls_loss
 def _rpn_loc_loss(self, pred_loc, gt_loc, gt_label):
     # Localization loss is calculated only for positive rois.
     # NOTE:  unlike origin implementation,
     # we don't need inside_weight and outside_weight, they can calculate by gt_label
     # 只有正样本参与计算loc的loss,负样本和忽略的样本的in_weight=0
     in_weight = torch.zeros(gt_loc.shape)
     in_weight[gt_label > 0] = 1
     in_weight = utils.totensor(in_weight)
     loc_loss = self._smooth_l1_loss(pred_loc, gt_loc, in_weight)
     loc_loss /= ((gt_label > 0).sum().float() + 1)  # 取mean, +1防止nan
     return loc_loss
    def predict(self, img, scale):
        n = img.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = img.shape
        img_size = (H, W)

        # ------------------ 预测 -------------------#
        with torch.no_grad():
            scale = utils.totensor(scale)
            img = utils.totensor(img)
            features = self.extractor(img)
            rpn_loc, rpn_score, roi, _ = self.rpn(features, img_size, scale)
            roi_cls_loc, roi_cls_score = self.roi_head(features, roi)

            n_roi = roi.shape[0]
            roi_cls_score = roi_cls_score.data
            roi_cls_loc = roi_cls_loc.data.view(n_roi, self.n_class, 4)
            roi = utils.totensor(roi) / scale
            mean = utils.totensor(self.loc_normalize_mean)
            std = utils.totensor(self.loc_normalize_std)
            # print(roi.size(),roi_cls_loc.size(),std.size(),mean.size())
            roi_cls_loc = (roi_cls_loc * std + mean)

            roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
            # print(roi.shape,roi_cls_loc.shape)
            cls_bbox = utils.loc2bbox(
                utils.tonumpy(roi).reshape((-1, 4)),
                utils.tonumpy(roi_cls_loc).reshape((-1, 4)))
            cls_bbox = utils.totensor(cls_bbox)
            cls_bbox = cls_bbox.view(-1, self.n_class, 4)
            # clip bounding box
            cls_bbox[:, :, 0::2] = (cls_bbox[:, :, 0::2]).clamp(min=0, max=H)
            cls_bbox[:, :, 1::2] = (cls_bbox[:, :, 1::2]).clamp(min=0, max=W)

            prob = F.softmax(utils.totensor(roi_cls_score),
                             dim=1)  # shape:(n_roi,21)
            # print(prob)
            label = torch.max(prob, dim=1)[1].data  # shape:(n_roi,)
            # background mask
            mask_label = np.where(label.cpu().numpy() != 0)[0]
            # print(label.cpu().numpy())
            bbox = torch.gather(cls_bbox, 1,
                                label.view(-1, 1).unsqueeze(2).repeat(
                                    1, 1, 4)).squeeze(1)

            # delete background
            label = label.cpu().numpy()[mask_label]
            bbox = bbox.cpu().numpy()[mask_label]

        return bbox, label
Esempio n. 10
0
    def __getitem__(self, index):
        # get gt frame
        img = import_yuv(
            seq_path=self.data_info['gt_path'][index],
            h=self.data_info['h'][index],
            w=self.data_info['w'][index],
            tot_frm=1,
            start_frm=self.data_info['gt_index'][index],
            only_y=True
            )
        img_gt = np.expand_dims(
            np.squeeze(img), 2
            ).astype(np.float32) / 255.  # (H W 1)

        # get lq frames
        img_lqs = []
        for lq_index in self.data_info['lq_indexes'][index]:
            img = import_yuv(
                seq_path=self.data_info['lq_path'][index],
                h=self.data_info['h'][index],
                w=self.data_info['w'][index],
                tot_frm=1,
                start_frm=lq_index,
                only_y=True
                )
            img_lq = np.expand_dims(
                np.squeeze(img), 2
                ).astype(np.float32) / 255.  # (H W 1)
            img_lqs.append(img_lq)

        # no any augmentation

        # to tensor
        img_lqs.append(img_gt)
        img_results = totensor(img_lqs)
        img_lqs = torch.stack(img_results[0:-1], dim=0)
        img_gt = img_results[-1]

        return {
            'lq': img_lqs,  # (T 1 H W)
            'gt': img_gt,  # (1 H W)
            'name_vid': self.data_info['name_vid'][index], 
            'index_vid': self.data_info['index_vid'][index], 
            }
    def train_step(self, img, bbox, label, scale):
        n = bbox.shape[0]
        if n != 1:
            raise ValueError('Currently only batch size 1 is supported.')

        _, _, H, W = img.shape
        img_size = (H, W)

        # extractor在这里是VGG16的前10层,通过extractor可以提取feature_map
        # print(img)
        img = utils.totensor(img)
        features = self.extractor(img)

        # ------------------ RPN Network -------------------#
        # ------------------ RPN 预测 -------------------#
        # 通过RPN网络提取roi
        # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4]
        # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2]
        # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4]
        # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到
        # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引
        # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的
        # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测
        rpn_loc, rpn_score, roi, anchor = self.rpn(features, img_size, scale)

        # Since batch size is one, convert variables to singular form
        # 因为这里只支持BatchSize=1,所以直接提取出来
        bbox = bbox[0]
        label = label[0]
        rpn_score = rpn_score[0]  # [n_anchor,2]
        rpn_loc = rpn_loc[0]  # [n_anchor,4]
        roi = roi

        # ------------------ RPN 标注 -------------------#
        # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值
        # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss
        # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,在计算loss时加权区分
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            utils.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = utils.totensor(gt_rpn_label).long()
        # print('RPN positive:negitive')
        # print('RPN samples:{}\t{}'.format(len(np.where(gt_rpn_label.cpu().data.numpy()==1)[0]),len(np.where(gt_rpn_label.cpu().data.numpy()==0)[0])))
        gt_rpn_loc = utils.totensor(gt_rpn_loc)

        # ------------------ RPN losses 计算 -------------------#
        # loc loss(位置回归loss)
        # loc的loss只计算正样本的
        rpn_loc_loss = self._fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc,
                                                gt_rpn_label.data,
                                                self.rpn_sigma)

        # cls loss(分类loss,这里只分两类)
        # label=-1的样本被忽略
        rpn_cls_loss = F.cross_entropy(rpn_score,
                                       gt_rpn_label.cuda(self.gpu),
                                       ignore_index=-1)
        # _gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
        # _rpn_score = utils.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
        # self.rpn_cm.add(utils.totensor(_rpn_score, False), _gt_rpn_label.data.long())

        # ------------------ ROI Nework -------------------#
        # ------------------ ROI 标注 -------------------#
        # Sample RoIs and forward
        # it's fine to break the computation graph of rois,
        # consider them as constant input
        # 在roi中采样一定数量的正负样本,给ROIHead(rcnn)网络用于训练分类
        # gt_roi_loc:位置修正量,这里就是第二次对位置进行回归修正
        # gt_roi_label:N+1类,多了一个背景类(是不是物体)
        sample_roi, gt_roi_loc, gt_roi_label = self.proposal_target_creator(
            roi, utils.tonumpy(bbox), utils.tonumpy(label),
            self.loc_normalize_mean, self.loc_normalize_std)
        # print('ROI foreground:backgroud')
        # print('ROI samples:{}\t{}'.format(len(np.where(gt_roi_label!=0)[0]),len(np.where(gt_roi_label==0)[0])))
        # NOTE it's all zero because now it only support for batch=1 now(这里解释了上面的疑问)
        # sample_roi_index = torch.zeros(len(sample_roi))

        # ------------------ ROI 预测 -------------------#
        # 这里不需要对所有的ROI进行预测,所以在标注阶段确定了样本之后再进行预测
        # 得到候选区域sample_roi的预测分类roi_score和预测位置修正量roi_cls_loc
        roi_cls_loc, roi_cls_score = self.roi_head(features, sample_roi)

        n_sample = roi_cls_loc.shape[0]
        roi_cls_loc = roi_cls_loc.view(n_sample, -1,
                                       4)  # [n_sample, n_class+1, 4]
        # roi_cls_loc得到的是对每个类的坐标的预测,但是真正的loss计算只需要在ground truth上的类的位置预测
        # roi_loc就是在ground truth上的类的位置预测
        roi_loc = roi_cls_loc[torch.arange(0, n_sample).long().cuda(self.gpu),
                              utils.totensor(
                                  gt_roi_label).long()]  # [m_sample.4]
        gt_roi_label = utils.totensor(gt_roi_label).long()
        gt_roi_loc = utils.totensor(gt_roi_loc)

        # loc loss(位置回归loss)
        roi_loc_loss = self._fast_rcnn_loc_loss(roi_loc.contiguous(),
                                                gt_roi_loc, gt_roi_label.data,
                                                self.roi_sigma)

        # cls loss(分类loss,这里分21类)
        roi_cls_loss = nn.CrossEntropyLoss()(roi_cls_score,
                                             gt_roi_label.cuda(self.gpu))

        # self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

        losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
        losses = losses + [sum(losses)]

        self.optimizer.zero_grad()
        rpn_loc_loss.backward(retain_graph=True)
        rpn_cls_loss.backward(retain_graph=True)
        roi_loc_loss.backward(retain_graph=True)
        roi_cls_loss.backward()
        self.optimizer.step()

        return LossTuple(*losses)
Esempio n. 12
0
    model = models.load_model(embedding_matrix=embedding,embedding_size=embedding.shape,**config)
    if config['gpu'] >= 0:
        model.cuda_(config['gpu'])
    model.train()
    optimizer = Adam(model.parameters(), lr=config['lr'])
    scheduler = LambdaLR(optimizer, lambda epoch: config['lr_decay'] ** epoch)

    # training
    for epoch in range(config['epoch_num']):
        scheduler.step()

        train_dataset = ToxicityDataset(x_train,y_train,weights,train_idx)
        train_dataloader = DataLoader(train_dataset,batch_size=config['batch_size'],shuffle=True,num_workers=1,collate_fn=SequenceBucketCollator())

        for batch in tqdm(train_dataloader):
            x = utils.totensor(batch[0], config['gpu']).long()
            y = utils.totensor(batch[1], config['gpu']).float()
            w = utils.totensor(batch[2], config['gpu']).float()

            pred,_ = model(x)

            loss1 = F.binary_cross_entropy(pred[:, 0], y[:, 0], w)
            loss2 = F.binary_cross_entropy(pred[:, 1:], y[:, 1:])
            loss = loss_sacle * loss1 + loss2

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


    del train_dataset
                     weight_decay=config['weight_decay'])
    # scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lambda epoch: config['lr_decay'] ** epoch)

    # train
    model.train()
    for epoch in range(config['epoch_num']):
        # scheduler.step()

        train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.float),
                                      torch.tensor(y_train, dtype=torch.float),
                                      torch.tensor(w_train, dtype=torch.float))
        train_loader = torch.utils.data.DataLoader(
            train_dataset, batch_size=config['batch_size'], shuffle=True)

        for i, (x, y, w) in enumerate(tqdm(train_loader)):
            x = utils.totensor(x, config['gpu']).float()
            y = utils.totensor(y, config['gpu']).float()
            w = utils.totensor(w, config['gpu']).float()

            pred = model(x)

            loss1 = F.binary_cross_entropy(pred[:, 0], y[:, 0], w)
            loss2 = F.binary_cross_entropy(pred[:, 1:], y[:, 1:])
            loss = loss_scale * loss1 + loss2

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

    # evaluate
    model.eval()
    def train_extracor_and_rpn(self, img, bbox, scale, retain_graph=True):

        _, _, H, W = img.shape
        img_size = (H, W)
        # print(img_size)

        # extractor在这里是VGG16的前43层,通过extractor可以提取feature_map
        # print(img)
        img = utils.totensor(img)
        features = self.extractor(img)
        # print(features.shape)

        # ------------------ RPN Network -------------------#
        # ------------------ RPN 预测 -------------------#
        # 通过RPN网络提取roi
        # rpn_locs:每个anchor的修正量,[1,9*hh*ww,4]
        # rpn_scores:每个anchor的二分类(是否为物体)得分,[1,9*hh*ww,2]
        # rois:通过rpn网络获得的ROI(候选区),训练时约2000个,[2000,4]
        # roi_indeces:不太懂,[0,0..0,0]?,长度和rois的个数一样,后面也根本没有用到
        # -解答-:全0是因为只支持batch size=1,这个index相当于在batch里的索引
        # rpn_locs和rpn_scores是用于训练时计算loss的,rois是给下面rcnn网络用来分类的
        # 注意,这里对每个anchor都进行了位置和分类的预测,也就是对9*hh*ww个anchor都进行了预测
        rpn_loc, rpn_score, roi, anchor = self.rpn(features,
                                                   img_size,
                                                   scale,
                                                   training=True)
        rpn_score = rpn_score[0]  # [n_anchor,2]
        rpn_loc = rpn_loc[0]  # [n_anchor,4]

        # ------------------ RPN 标注 -------------------#
        # 因为RPN网络对所有的(9*hh*ww)个anchor都进行了预测,所以这里的gt_rpn_loc, gt_rpn_label应该包含所有anchor的对应值
        # 但是在真实计算中只采样了一定的正负样本共256个用于计算loss
        # 这里的做法:正样本label=1,负样本label=0,不合法和要忽略的样本label=-1,loc=0,在计算loss时加权区分
        gt_rpn_loc, gt_rpn_label = self.anchor_target_creator(
            utils.tonumpy(bbox), anchor, img_size)
        gt_rpn_label = utils.totensor(gt_rpn_label).long()
        gt_rpn_loc = utils.totensor(gt_rpn_loc)

        # #debug
        # print('debug')
        # gt_rpn_loc_ = utils.loc2bbox(anchor,gt_rpn_loc.numpy())
        # # gt_rpn_loc_ = gt_rpn_loc_[gt_rpn_label.numpy()>=0]
        # # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,gt_rpn_loc_,)
        # anchor_ = anchor[gt_rpn_label.numpy()==0]
        # dataset_utils.draw_pic(img[0].numpy(),dataset.VOC_BBOX_LABEL_NAMES,anchor_)

        # ------------------ RPN losses 计算 -------------------#
        # loc loss(位置回归loss)
        # loc的loss只计算正样本的
        rpn_loc_loss = self._rpn_loc_loss(rpn_loc, gt_rpn_loc,
                                          gt_rpn_label.data)

        # cls loss(分类loss,这里只分两类)
        # label=-1的样本被忽略
        # rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label, ignore_index=-1)
        rpn_cls_loss = self._rpn_cls_loss(rpn_score, gt_rpn_label)

        rpn_loss = rpn_loc_loss + rpn_cls_loss
        self.extractor_rpn_optimizer.zero_grad()
        rpn_loss.backward(retain_graph=retain_graph)
        self.extractor_rpn_optimizer.step()

        return features, roi, rpn_loc_loss, rpn_cls_loss
Esempio n. 15
0
    def train(self):
        epochs = 1000
        self.genA2B.train(), self.genB2A.train(), self.disGA.train(
        ), self.disGB.train(), self.disLA.train(), self.disLB.train()
        print('training start !')
        start_time = time.time()
        '''加载预训练模型'''
        if self.pretrain:
            str_genA2B = "Parameters/genA2B%03d.pdparams" % (self.start - 1)
            str_genB2A = "Parameters/genB2A%03d.pdparams" % (self.start - 1)
            str_disGA = "Parameters/disGA%03d.pdparams" % (self.start - 1)
            str_disGB = "Parameters/disGB%03d.pdparams" % (self.start - 1)
            str_disLA = "Parameters/disLA%03d.pdparams" % (self.start - 1)
            str_disLB = "Parameters/disLB%03d.pdparams" % (self.start - 1)
            genA2B_para, gen_A2B_opt = fluid.load_dygraph(str_genA2B)
            genB2A_para, gen_B2A_opt = fluid.load_dygraph(str_genB2A)
            disGA_para, disGA_opt = fluid.load_dygraph(str_disGA)
            disGB_para, disGB_opt = fluid.load_dygraph(str_disGB)
            disLA_para, disLA_opt = fluid.load_dygraph(str_disLA)
            disLB_para, disLB_opt = fluid.load_dygraph(str_disLB)
            self.genA2B.load_dict(genA2B_para)
            self.genB2A.load_dict(genB2A_para)
            self.disGA.load_dict(disGA_para)
            self.disGB.load_dict(disGB_para)
            self.disLA.load_dict(disLA_para)
            self.disLB.load_dict(disLB_para)
        for epoch in range(self.start, epochs):
            for block_id, data in enumerate(self.train_reader()):
                real_A = np.array([x[0] for x in data], np.float32)
                real_B = np.array([x[1] for x in data], np.float32)
                real_A = totensor(real_A, block_id, 'train')
                real_B = totensor(real_B, block_id, 'train')

                # Update D

                fake_A2B, _, _ = self.genA2B(real_A)
                fake_B2A, _, _ = self.genB2A(real_B)

                real_GA_logit, real_GA_cam_logit, _ = self.disGA(real_A)
                real_LA_logit, real_LA_cam_logit, _ = self.disLA(real_A)
                real_GB_logit, real_GB_cam_logit, _ = self.disGB(real_B)
                real_LB_logit, real_LB_cam_logit, _ = self.disLB(real_B)

                fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
                fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
                fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
                fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)

                D_ad_loss_GA = mse_loss(1, real_GA_logit) + mse_loss(
                    0, fake_GA_logit)
                D_ad_cam_loss_GA = mse_loss(1, real_GA_cam_logit) + mse_loss(
                    0, fake_GA_cam_logit)

                D_ad_loss_LA = mse_loss(1, real_LA_logit) + mse_loss(
                    0, fake_LA_logit)
                D_ad_cam_loss_LA = mse_loss(1, real_LA_cam_logit) + mse_loss(
                    0, fake_LA_cam_logit)

                D_ad_loss_GB = mse_loss(1, real_GB_logit) + mse_loss(
                    0, fake_GB_logit)
                D_ad_cam_loss_GB = mse_loss(1, real_GB_cam_logit) + mse_loss(
                    0, fake_GB_cam_logit)

                D_ad_loss_LB = mse_loss(1, real_LB_logit) + mse_loss(
                    0, fake_LB_logit)
                D_ad_cam_loss_LB = mse_loss(1, real_LB_cam_logit) + mse_loss(
                    0, fake_LB_cam_logit)

                D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA +
                                              D_ad_loss_LA + D_ad_cam_loss_LA)
                D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB +
                                              D_ad_loss_LB + D_ad_cam_loss_LB)

                Discriminator_loss = D_loss_A + D_loss_B
                Discriminator_loss.backward()
                self.D_opt.minimize(Discriminator_loss)
                self.disGA.clear_gradients(), self.disGB.clear_gradients(
                ), self.disLA.clear_gradients(), self.disLB.clear_gradients()

                # Update G

                fake_A2B, fake_A2B_cam_logit, _ = self.genA2B(real_A)
                fake_B2A, fake_B2A_cam_logit, _ = self.genB2A(real_B)
                print("fake_A2B.shape:", fake_A2B.shape)
                fake_A2B2A, _, _ = self.genB2A(fake_A2B)
                fake_B2A2B, _, _ = self.genA2B(fake_B2A)

                fake_A2A, fake_A2A_cam_logit, _ = self.genB2A(real_A)
                fake_B2B, fake_B2B_cam_logit, _ = self.genA2B(real_B)

                fake_GA_logit, fake_GA_cam_logit, _ = self.disGA(fake_B2A)
                fake_LA_logit, fake_LA_cam_logit, _ = self.disLA(fake_B2A)
                fake_GB_logit, fake_GB_cam_logit, _ = self.disGB(fake_A2B)
                fake_LB_logit, fake_LB_cam_logit, _ = self.disLB(fake_A2B)

                G_ad_loss_GA = mse_loss(1, fake_GA_logit)
                G_ad_cam_loss_GA = mse_loss(1, fake_GA_cam_logit)

                G_ad_loss_LA = mse_loss(1, fake_LA_logit)
                G_ad_cam_loss_LA = mse_loss(1, fake_LA_cam_logit)

                G_ad_loss_GB = mse_loss(1, fake_GB_logit)
                G_ad_cam_loss_GB = mse_loss(1, fake_GB_cam_logit)

                G_ad_loss_LB = mse_loss(1, fake_LB_logit)
                G_ad_cam_loss_LB = mse_loss(1, fake_LB_cam_logit)

                G_recon_loss_A = self.L1loss(fake_A2B2A, real_A)
                G_recon_loss_B = self.L1loss(fake_B2A2B, real_B)

                G_identity_loss_A = self.L1loss(fake_A2A, real_A)
                G_identity_loss_B = self.L1loss(fake_B2B, real_B)

                G_cam_loss_A = bce_loss(1, fake_B2A_cam_logit) + bce_loss(
                    0, fake_A2A_cam_logit)
                G_cam_loss_B = bce_loss(1, fake_A2B_cam_logit) + bce_loss(
                    0, fake_B2B_cam_logit)

                G_loss_A = self.adv_weight * (
                    G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA +
                    G_ad_cam_loss_LA
                ) + self.cycle_weight * G_recon_loss_A + self.identity_weight * G_identity_loss_A + self.cam_weight * G_cam_loss_A
                G_loss_B = self.adv_weight * (
                    G_ad_loss_GB + G_ad_cam_loss_GB + G_ad_loss_LB +
                    G_ad_cam_loss_LB
                ) + self.cycle_weight * G_recon_loss_B + self.identity_weight * G_identity_loss_B + self.cam_weight * G_cam_loss_B

                Generator_loss = G_loss_A + G_loss_B
                Generator_loss.backward()
                self.G_opt.minimize(Generator_loss)
                self.genA2B.clear_gradients(), self.genB2A.clear_gradients()

                print("[%5d/%5d] time: %4.4f d_loss: %.5f, g_loss: %.5f" %
                      (epoch, block_id, time.time() - start_time,
                       Discriminator_loss.numpy(), Generator_loss.numpy()))
                print("G_loss_A: %.5f G_loss_B: %.5f" %
                      (G_loss_A.numpy(), G_loss_B.numpy()))
                print("G_ad_loss_GA: %.5f   G_ad_loss_GB: %.5f" %
                      (G_ad_loss_GA.numpy(), G_ad_loss_GB.numpy()))
                print("G_ad_loss_LA: %.5f   G_ad_loss_LB: %.5f" %
                      (G_ad_loss_LA.numpy(), G_ad_loss_LB.numpy()))
                print("G_cam_loss_A:%.5f  G_cam_loss_B:%.5f" %
                      (G_cam_loss_A.numpy(), G_cam_loss_B.numpy()))
                print("G_recon_loss_A:%.5f  G_recon_loss_B:%.5f" %
                      (G_recon_loss_A.numpy(), G_recon_loss_B.numpy()))
                print("G_identity_loss_A:%.5f  G_identity_loss_B:%.5f" %
                      (G_identity_loss_B.numpy(), G_identity_loss_B.numpy()))

                if epoch % 2 == 1 and block_id % self.print_freq == 0:

                    A2B = np.zeros((self.img_size * 7, 0, 3))
                    # B2A = np.zeros((self.img_size * 7, 0, 3))
                    for eval_id, eval_data in enumerate(self.test_reader()):
                        if eval_id == 10:
                            break
                        real_A = np.array([x[0] for x in eval_data],
                                          np.float32)
                        real_B = np.array([x[1] for x in eval_data],
                                          np.float32)
                        real_A = totensor(real_A, eval_id, 'eval')
                        real_B = totensor(real_B, eval_id, 'eval')

                        fake_A2B, _, fake_A2B_heatmap = self.genA2B(real_A)
                        fake_B2A, _, fake_B2A_heatmap = self.genB2A(real_B)

                        fake_A2B2A, _, fake_A2B2A_heatmap = self.genB2A(
                            fake_A2B)
                        fake_B2A2B, _, fake_B2A2B_heatmap = self.genA2B(
                            fake_B2A)

                        fake_A2A, _, fake_A2A_heatmap = self.genB2A(real_A)
                        fake_B2B, _, fake_B2B_heatmap = self.genA2B(real_B)

                        a = tensor2numpy(denorm(real_A[0]))
                        b = cam(tensor2numpy(fake_A2A_heatmap[0]),
                                self.img_size)
                        c = tensor2numpy(denorm(fake_A2A[0]))
                        d = cam(tensor2numpy(fake_A2B_heatmap[0]),
                                self.img_size)
                        e = tensor2numpy(denorm(fake_A2B[0]))
                        f = cam(tensor2numpy(fake_A2B2A_heatmap[0]),
                                self.img_size)
                        g = tensor2numpy(denorm(fake_A2B2A[0]))
                        A2B = np.concatenate((A2B, (np.concatenate(
                            (a, b, c, d, e, f, g)) * 255).astype(np.uint8)),
                                             1).astype(np.uint8)
                    A2B = Image.fromarray(A2B)
                    A2B.save('Images/%d_%d.png' % (epoch, block_id))
                    self.genA2B.train(), self.genB2A.train(), self.disGA.train(
                    ), self.disGB.train(), self.disLA.train(
                    ), self.disLB.train()
            if epoch % 4 == 0:
                fluid.save_dygraph(self.genA2B.state_dict(),
                                   "Parameters/genA2B%03d" % (epoch))
                fluid.save_dygraph(self.genB2A.state_dict(),
                                   "Parameters/genB2A%03d" % (epoch))
                fluid.save_dygraph(self.disGA.state_dict(),
                                   "Parameters/disGA%03d" % (epoch))
                fluid.save_dygraph(self.disGB.state_dict(),
                                   "Parameters/disGB%03d" % (epoch))
                fluid.save_dygraph(self.disLA.state_dict(),
                                   "Parameters/disLA%03d" % (epoch))
                fluid.save_dygraph(self.disLB.state_dict(),
                                   "Parameters/disLB%03d" % (epoch))
    model.eval()

    # on train set
    train_dataset = ToxicityTestDataset(x_train, np.zeros((train_num, )))
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=config['batch_size'],
                                  shuffle=False,
                                  num_workers=1,
                                  collate_fn=SequenceBucketCollator())

    train_feature_fold = []
    train_pred_fold = []

    with torch.no_grad():
        for x, _ in tqdm(train_dataloader):
            x = utils.totensor(x, config['gpu']).long()

            pred, feature = model(x)
            pred = pred[:, 0]
            train_feature_fold.append(feature)
            train_pred_fold.append(pred)

    train_feature_fold = torch.cat(train_feature_fold, dim=0)
    train_features.append(train_feature_fold.cpu().numpy())

    train_pred_fold = torch.cat(train_pred_fold, dim=0)
    train_pred_fold = train_pred_fold.cpu().numpy()
    validate_pred_fold = train_pred_fold[validate_idx]
    train_preds.append(
        pd.DataFrame({
            'id': validate_idx,