예제 #1
0
    if video[0] == '.':
        continue
    imglist_ = sorted(os.listdir(os.path.join(data_dir, video, 'origin')))
    for i, img in enumerate(imglist_[:-1]):
        if img[0] == '.':
            continue
        #print('processing video : {} image: {}'.format(video,img))
        next_img = imglist_[i + 1]
        imgname = img
        next_imgname = next_img
        img = Image.open(os.path.join(data_dir, video, 'origin', img))
        next_img = Image.open(os.path.join(data_dir, video, 'origin',
                                           next_img))
        image1 = torch.from_numpy(np.array(img))
        image2 = torch.from_numpy(np.array(next_img))
        padder = InputPadder(image1.size()[:2])
        image1 = image1.unsqueeze(0).permute(0, 3, 1, 2)
        image2 = image2.unsqueeze(0).permute(0, 3, 1, 2)
        image1 = padder.pad(image1)
        image2 = padder.pad(image2)
        image1 = image1.cuda(gpu)
        image2 = image2.cuda(gpu)
        with torch.no_grad():
            model_raft.eval()
            _, flow = model_raft(image1, image2, iters=20, test_mode=True)
            flow = padder.unpad(flow)

        flow = flow.data.cpu()
        pred = Image.open(
            os.path.join(result_dir, video,
                         imgname.split('.')[0] + '.png'))
예제 #2
0
    def forward(self, feed_dict, *, segSize=None):
        if feed_dict is None:
            return torch.zeros((0, self.args.num_class, 480, 720)).cuda()
        # training
        c_img = feed_dict['img_data']
        clip_imgs = feed_dict['clipimgs_data']
        label = feed_dict['seg_label']
        clip_num = len(clip_imgs)
        assert (clip_num == 1)
        n, _, h, w = label.size()
        c_pre_img = clip_imgs[0]
        mean = self.mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
        mean = mean.to(c_img.device)
        mean = mean.expand_as(c_img)
        std = self.std.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
        std = std.to(c_img.device)
        std = std.expand_as(c_img)
        c_img_f = ((c_img * std) + mean) * 255.
        c_pre_img_f = (c_pre_img * std + mean) * 255.
        with torch.no_grad():
            self.raft.eval()
            #            if segSize is None:
            padder = InputPadder((h, w))
            c_img_f_ = padder.pad(c_img_f)
            c_pre_img_f_ = padder.pad(c_pre_img_f)
            _, flow = self.raft(c_img_f_,
                                c_pre_img_f_,
                                iters=20,
                                test_mode=True)
            flow = padder.unpad(flow)

        #########
        #print(c_img_f.size())
        #c_img_show = c_img_f.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_img_show = Image.fromarray(c_img_show.astype('uint8'))
        #c_img_show.save('111.png')
        #c_pre_img_show = c_pre_img_f.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_pre_img_show = Image.fromarray(c_pre_img_show.astype('uint8'))
        #c_pre_img_show.save('222.png')
        #c_img_warp =flowwarp(c_pre_img_f,flow)
        #c_img_warp = c_img_warp.squeeze(0).permute(1,2,0).cpu().numpy()
        #c_img_warp  = Image.fromarray(c_img_warp.astype('uint8'))
        #c_img_warp.save('warp_111.png')
        #exit()

        #########
        flow = self.flowcnn(c_img_f, c_pre_img_f, flow)
        input = torch.cat([c_img, c_pre_img], 0)
        clip_tmp = self.encoder(input, return_feature_maps=True)
        c_img_f1, c_pre_img_f1 = torch.split(clip_tmp[-1],
                                             split_size_or_sections=int(
                                                 clip_tmp[-1].size(0) / 2),
                                             dim=0)
        flow_1 = F.interpolate(flow, c_img_f1.size()[-2:], mode='nearest')
        c_img_f1_warp = flowwarp(c_pre_img_f1, flow_1)
        new_c_img_f1 = self.w0_0.unsqueeze(0).unsqueeze(-1).unsqueeze(
            -1).expand_as(c_img_f1).to(
                c_img_f1.device) * c_img_f1 + self.w0_1.unsqueeze(0).unsqueeze(
                    -1).unsqueeze(-1).expand_as(c_img_f1_warp).to(
                        c_img_f1_warp.device) * c_img_f1_warp
        feat = torch.cat([new_c_img_f1, c_pre_img_f1], 0)
        clip_tmp[-1] = feat
        pred_deepsup_s, _, clip_tmp2 = self.decoder(clip_tmp)
        c_img_f2, c_pre_img_f2 = torch.split(clip_tmp2,
                                             split_size_or_sections=int(
                                                 clip_tmp2.size(0) / 2),
                                             dim=0)
        ####
        #ccc1,ccc2  = torch.split(_,split_size_or_sections=int(clip_tmp2.size(0)/2),dim=0)
        #save1 = ccc1.cpu().numpy()
        #save1 = np.save('1.npy',save1)
        #save2 = ccc2.cpu().numpy()
        #save2 = np.save('2.npy',save2)
        #exit()
        ###
        flow_2 = F.interpolate(flow, c_img_f2.size()[-2:], mode='nearest')
        c_img_f2_warp = flowwarp(c_pre_img_f2, flow_2)
        new_feat = self.w1_0.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).expand_as(c_img_f2).to(c_img_f2)*c_img_f2+ \
                   self.w1_1.unsqueeze(0).unsqueeze(-1).unsqueeze(-1).expand_as(c_img_f2_warp).to(c_img_f2_warp)*c_img_f2_warp
        pred_ = self.conv_last_(new_feat)

        if segSize is not None:
            pred_ = nn.functional.interpolate(pred_,
                                              size=segSize,
                                              mode='bilinear',
                                              align_corners=False)
            pred_ = nn.functional.softmax(pred_, dim=1)
            return pred_
        else:
            pred_ = nn.functional.log_softmax(pred_, dim=1)
            _, _, h, w = label.size()
            label = label.squeeze(1)
            label = label.long()
            pred_ = F.interpolate(pred_, (h, w),
                                  mode='bilinear',
                                  align_corners=False)
            loss = self.crit(pred_, label)
            if self.deep_sup_scale is not None:
                pred_deepsup_s = torch.split(pred_deepsup_s,
                                             split_size_or_sections=int(
                                                 pred_deepsup_s.size(0) / 2),
                                             dim=0)
                pred_deepsup = F.interpolate(pred_deepsup_s[0], (h, w),
                                             mode='bilinear',
                                             align_corners=False)
                #pred_deepsup= nn.functional.log_softmax(pred_deepsup, dim=1)
                loss_deepsup = self.crit(pred_deepsup, label)
                loss = loss + loss_deepsup * self.deep_sup_scale
            acc = self.pixel_acc(pred_, label)
            return loss, acc
예제 #3
0
    def forward(self, feed_dict, *, segSize=None):
        if feed_dict is None:
            return torch.zeros((0, self.args.num_class, 480, 720)).cuda()
        # training
        if segSize is None:
            c_img = feed_dict['img_data']
            clip_imgs = feed_dict['clipimgs_data']
            label = feed_dict['seg_label']
            clip_num = len(clip_imgs)
            assert (clip_num == 1)
            n, _, h, w = label.size()
            c_pre_img = clip_imgs[0]
            mean = self.mean.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
            mean = mean.to(c_img.device)
            mean = mean.expand_as(c_img)
            std = self.std.unsqueeze(0).unsqueeze(-1).unsqueeze(-1)
            std = std.to(c_img.device)
            std = std.expand_as(c_img)
            c_img_f = ((c_img * std) + mean) * 255.
            c_pre_img_f = (c_pre_img * std + mean) * 255.
            with torch.no_grad():
                self.raft.eval()
                padder = InputPadder((h, w))
                c_img_f_ = padder.pad(c_img_f)
                c_pre_img_f_ = padder.pad(c_pre_img_f)
                # c_img_f = F.interpolate(c_img_f,(480,480),mode='bilinear',align_corners=False)
                # c_pre_img_f = F.interpolate(c_pre_img_f,(480,480),mode='bilinear',align_corners=False)
                _, flow = self.raft(c_img_f_,
                                    c_pre_img_f_,
                                    iters=20,
                                    test_mode=True)
                flow = padder.unpad(flow)

            #########
            input = torch.cat([c_img, c_pre_img], 0)
            clip_tmp = self.encoder(input, return_feature_maps=True)
            clip_tmp2, pred_deepsup_s = self.decoder(clip_tmp)
            c_img_f2, c_pre_img_f2 = torch.split(clip_tmp2,
                                                 split_size_or_sections=int(
                                                     clip_tmp2.size(0) / 2),
                                                 dim=0)
            pred_ = self.conv_last_(clip_tmp2)
            c_pred_, c_pre_pred_ = torch.split(pred_,
                                               split_size_or_sections=int(
                                                   pred_.size(0) / 2),
                                               dim=0)

            c_pred_1 = nn.functional.log_softmax(c_pred_, dim=1)
            _, _, h, w = label.size()
            label = label.squeeze(1)
            label = label.long()
            c_pred_1 = F.interpolate(c_pred_1, (h, w),
                                     mode='bilinear',
                                     align_corners=False)
            loss = self.crit(c_pred_1, label)
            #            if self.deep_sup_scale is not None:

            clip_label = feed_dict['cliplabels_data']
            clip_label.append(feed_dict['seg_label'])
            clip_label = torch.cat(clip_label, dim=0)
            clip_label = clip_label.squeeze(1).long()
            pred_deepsup_s = nn.functional.log_softmax(pred_deepsup_s, dim=1)
            pred_deepsup = F.interpolate(pred_deepsup_s, (h, w),
                                         mode='bilinear',
                                         align_corners=False)
            loss_deepsup = self.crit(pred_deepsup, clip_label)
            loss = loss + loss_deepsup * self.deep_sup_scale
            flow = F.interpolate(flow, (h, w), mode='nearest')
            c_pre_pred_ = F.interpolate(c_pre_pred_, (h, w),
                                        mode='bilinear',
                                        align_corners=False)
            c_pred_ = F.interpolate(c_pred_, (h, w),
                                    mode='bilinear',
                                    align_corners=False)
            warp_i1 = flowwarp(c_pre_img, flow)
            warp_o1 = flowwarp(c_pre_pred_, flow)

            noc_mask2 = torch.exp(
                -1 * torch.abs(torch.sum(c_img - warp_i1, dim=1))).unsqueeze(1)
            ST_loss = self.args.st_weight * self.criterion_flow(
                c_pred_ * noc_mask2, warp_o1 * noc_mask2)
            loss = loss + ST_loss
            acc = self.pixel_acc(c_pred_1, label)
            return loss, acc
        else:
            c_img = feed_dict['img_data']
            c_tmp = self.encoder(c_img, return_feature_maps=True)
            c_tmp2, pred_deepsup_s = self.decoder(c_tmp)
            c_pred_ = self.conv_last_(c_tmp2)
            c_pred_ = nn.functional.interpolate(c_pred_,
                                                size=segSize,
                                                mode='bilinear',
                                                align_corners=False)
            c_pred_ = nn.functional.softmax(c_pred_, dim=1)
            return c_pred_