Esempio n. 1
0
def Run_video_motion(model,
                     batch,
                     Mem_every=None,
                     Mem_number=None,
                     mode='train'):
    Fs, Ms, info = batch['Fs'], batch['Ms'], batch['info']
    num_frames = Fs.shape[2]
    # intervals = info['intervals']
    if Mem_every:
        to_memorize = [
            int(i) for i in np.arange(0, num_frames, step=Mem_every)
        ]
    elif Mem_number:
        to_memorize = [
            int(round(i))
            for i in np.linspace(0, num_frames, num=Mem_number + 2)[:-1]
        ]
    else:
        raise NotImplementedError

    B, _, f, H, W = Fs.shape
    Es = torch.zeros(
        (B, 1, f, H, W)).float().cuda()  # [1,1,50,480,864][b,c,t,h,w]
    Es[:, :, 0] = Ms[:, :, 0]

    loss_video = torch.tensor(0.0).cuda()

    for t in range(1, num_frames):
        # interval = intervals[t][0].item()
        # if mode == 'train':
        #     if interval != 1:
        #         model.module.Memory.eval()
        #     else:
        #         model.module.Memory.train()
        # memorize
        pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]])
        pre_key = pre_key.unsqueeze(2)
        pre_value = pre_value.unsqueeze(2)

        if t - 1 == 0:  # the first frame
            this_keys_m, this_values_m = pre_key, pre_value
        else:  # other frame
            this_keys_m = torch.cat([keys, pre_key], dim=2)
            this_values_m = torch.cat([values, pre_value], dim=2)

        # segment
        prev_mask = torch.round(Es[:, :, t - 1].detach()).float()
        logits, p_m2, p_m3 = model(
            [Fs[:, :, t], this_keys_m, this_values_m, prev_mask])
        em = F.softmax(logits, dim=1)[:, 1]  # B h w
        Es[:, 0, t] = em

        #  calculate loss on cuda
        if mode == 'train' or mode == 'val':
            Ms_cuda = Ms[:, 0, t].cuda()
            loss_video += _loss(logits, Ms_cuda) + 0.5 * _loss(
                p_m2, Ms_cuda) + 0.25 * _loss(p_m3, Ms_cuda)

        # update key and value
        if t - 1 in to_memorize:
            keys, values = this_keys_m, this_values_m

    #  calculate mIOU on cuda
    pred = torch.round(Es.float().cuda())
    if mode == 'train' or mode == 'val':
        video_mIoU = 0
        for n in range(len(Ms)):  # Nth batch
            video_mIoU = video_mIoU + get_video_mIoU(
                pred[n],
                Ms[n].cuda())  # mIOU of video(t frames) for each batch
        video_mIoU = video_mIoU / len(Ms)  # mean IoU among batch

        return loss_video / num_frames, video_mIoU

    elif mode == 'test':
        return pred, Es
Esempio n. 2
0
def Run_video_enhanced_motion(model,
                              batch,
                              Mem_every=None,
                              Mem_number=None,
                              mode='train'):
    Fs, Ms, info = batch['Fs'], batch['Ms'], batch['info']
    num_frames = info['num_frames'][0].item()
    intervals = info['intervals']
    if Mem_every:
        to_memorize = [
            int(i) for i in np.arange(0, num_frames, step=Mem_every)
        ]
    elif Mem_number:
        to_memorize = [
            int(round(i))
            for i in np.linspace(0, num_frames, num=Mem_number + 2)[:-1]
        ]
    else:
        raise NotImplementedError

    b, c, f, h, w = Fs.shape
    Es = torch.zeros(
        (b, 1, f, h, w)).float().cuda()  # [1,1,50,480,864][b,c,t,h,w]
    Es[:, :, 0] = Ms[:, :, 0]

    loss_video = torch.tensor(0.0).cuda()
    loss_total = torch.tensor(0.0).cuda()

    Os = torch.zeros((b, c, int(h / 4), int(w / 4)))
    first_frame = Fs[:, :, 0].detach()
    first_mask = Ms[:, :, 0].detach()
    first_frame = first_frame * first_mask.repeat(1, 3, 1, 1).type(torch.float)
    for i in range(b):
        mask_ = first_mask[i]
        mask_ = mask_.squeeze(0).cpu().numpy().astype(np.uint8)
        assert np.any(mask_)
        x, y, w_, h_ = cv2.boundingRect(mask_)
        # c_x = x + w_ / 2
        # c_y = y + h_ / 2
        # c_x = np.clip(c_x, h / 8, 7 * h / 8)
        # c_y = np.clip(c_y, w / 8, 7 * w / 8)
        patch = first_frame[i, :, y:(y + h_), x:(x + w_)].cpu().numpy()
        patch = patch.transpose(1, 2, 0)
        # patch = cv2.resize(patch, (template_size, template_size))
        # patch = patch.transpose(2, 1, 0)
        patch = cv2.resize(patch, (int(w / 4), int(h / 4)))
        patch = patch.transpose(2, 0, 1)
        patch = torch.from_numpy(patch)
        Os[i, :, :, :] = patch

    for t in range(1, num_frames):
        interval = intervals[t][0].item()
        if mode == 'train':
            if interval != 1:
                model.module.Memory.eval()
            else:
                model.module.Memory.train()
        # memorize
        pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]])
        pre_key = pre_key.unsqueeze(2)
        pre_value = pre_value.unsqueeze(2)

        if t - 1 == 0:  # the first frame
            this_keys_m, this_values_m = pre_key, pre_value
        else:  # other frame
            this_keys_m = torch.cat([keys, pre_key], dim=2)
            this_values_m = torch.cat([values, pre_value], dim=2)

        # segment
        prev_mask = torch.round(Es[:, :, t - 1].detach()).float()
        logits, p_m2, p_m3 = model(
            [Fs[:, :, t], Os, this_keys_m, this_values_m, prev_mask])
        em = F.softmax(logits, dim=1)[:, 1]  # B h w
        Es[:, 0, t] = em

        #  calculate loss on cuda
        if mode == 'train' or mode == 'val':
            Ms_cuda = Ms[:, 0, t].cuda()
            loss_video += _loss(logits, Ms_cuda) + 0.5 * _loss(
                p_m2, Ms_cuda) + 0.25 * _loss(p_m3, Ms_cuda)
            loss_total = loss_video

        # update key and value
        if t - 1 in to_memorize:
            keys, values = this_keys_m, this_values_m

    #  calculate mIOU on cuda
    pred = torch.round(Es.float().cuda())
    if mode == 'train' or mode == 'val':
        video_mIoU = 0
        for n in range(len(Ms)):  # Nth batch
            video_mIoU = video_mIoU + get_video_mIoU(
                pred[n],
                Ms[n].cuda())  # mIOU of video(t frames) for each batch
        video_mIoU = video_mIoU / len(Ms)  # mean IoU among batch

        return loss_total / num_frames, video_mIoU

    elif mode == 'test':
        return pred, Es
Esempio n. 3
0
def Run_video_hkf(model, batch, Mem_every=1, Mem_number=None, mode='train'):
    Fs, Ms, info = batch['Fs'], batch['Ms'], batch['info']
    # if random.random() < 0.5:
    #     Fs = Fs[:, :, ::-1, ...]
    #     Ms = Ms[:, :, ::-1, ...]
    num_frames = info['num_frames'][0].item()
    # if Mem_every:
    #     to_memorize = [int(i) for i in np.arange(0, num_frames, step=Mem_every)]
    # elif Mem_number:
    #     to_memorize = [int(round(i)) for i in np.linspace(0, num_frames, num=Mem_number + 2)[:-1]]
    # else:
    #     raise NotImplementedError

    B, _, f, H, W = Fs.shape
    Es = torch.zeros(
        (B, 1, f, H, W)).float().cuda()  # [1,1,50,480,864][b,c,t,h,w]
    Es[:, :, 0] = Ms[:, :, 0]

    loss_video = torch.tensor(0.0).cuda()
    loss_total = torch.tensor(0.0).cuda()

    for t in range(1, num_frames):
        # memorize
        # pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]])
        # pre_key = pre_key.unsqueeze(2)
        # pre_value = pre_value.unsqueeze(2)
        #
        # if t - 1 == 0:  # the first frame
        #     this_keys_m, this_values_m = pre_key, pre_value
        # else:  # other frame
        #     this_keys_m = torch.cat([keys, pre_key], dim=2)
        #     this_values_m = torch.cat([values, pre_value], dim=2)

        # segment
        logits, p_m2, p_m3 = model(
            [Fs[:, :, t], Fs[:, :, t - 1], Es[:, :, t - 1]])
        em = F.softmax(logits, dim=1)[:, 1]  # B h w
        Es[:, 0, t] = em

        #  calculate loss on cuda
        if mode == 'train' or mode == 'val':
            Ms_cuda = Ms[:, 0, t].cuda()
            loss_video += _loss(logits, Ms_cuda) + 0.5 * _loss(
                p_m2, Ms_cuda) + 0.25 * _loss(p_m3, Ms_cuda)
            loss_total = loss_video

        # update key and value
        # if t - 1 in to_memorize:
        #     keys, values = this_keys_m, this_values_m

    #  calculate mIOU on cuda
    pred = torch.round(Es.float().cuda())
    if mode == 'train' or mode == 'val':
        video_mIoU = 0
        for n in range(len(Ms)):  # Nth batch
            video_mIoU = video_mIoU + get_video_mIoU(
                pred[n],
                Ms[n].cuda())  # mIOU of video(t frames) for each batch
        video_mIoU = video_mIoU / len(Ms)  # mean IoU among batch

        return loss_total / num_frames, video_mIoU

    elif mode == 'test':
        return pred, Es
Esempio n. 4
0
def Run_video_enhanced_varysize(model,
                                batch,
                                Mem_every=None,
                                Mem_number=None,
                                mode='train'):
    Fs, Ms, info = batch['Fs'], batch['Ms'], batch['info']
    num_frames = info['num_frames'][0].item()
    if Mem_every:
        to_memorize = [
            int(i) for i in np.arange(0, num_frames, step=Mem_every)
        ]
    elif Mem_number:
        to_memorize = [
            int(round(i))
            for i in np.linspace(0, num_frames, num=Mem_number + 2)[:-1]
        ]
    else:
        raise NotImplementedError

    b, c, f, h, w = Fs.shape
    Es = torch.zeros(
        (b, 1, f, h, w)).float().cuda()  # [1,1,50,480,864][b,c,t,h,w]
    Es[:, :, 0] = Ms[:, :, 0]

    os = []
    first_frame = Fs[:, :, 0].detach()
    first_mask = Ms[:, :, 0].detach()
    first_frame = first_frame * first_mask.repeat(1, 3, 1, 1).type(torch.float)
    for i in range(b):
        mask_ = first_mask[i]
        mask_ = mask_.squeeze(0).cpu().numpy().astype(np.uint8)
        assert np.any(mask_)
        x, y, w_, h_ = cv2.boundingRect(mask_)
        patch = first_frame[i, :, y:(y + h_), x:(x + w_)].cpu().numpy()
        Os = torch.zeros((1, c, h_, w_))
        patch = patch.transpose(1, 2, 0)
        patch = patch.transpose(2, 0, 1)
        patch = torch.from_numpy(patch)
        Os[0, :, :, :] = patch
        os.append(Os)

    loss_video = torch.tensor(0.0).cuda()

    for t in range(1, num_frames):
        # memorize
        pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]])
        pre_key = pre_key.unsqueeze(2)
        pre_value = pre_value.unsqueeze(2)

        if t - 1 == 0:  # the first frame
            this_keys_m, this_values_m = pre_key, pre_value
        else:  # other frame
            this_keys_m = torch.cat([keys, pre_key], dim=2)
            this_values_m = torch.cat([values, pre_value], dim=2)

        # segment
        logits, p_m2, p_m3 = model(
            [Fs[:, :, t], os, this_keys_m, this_values_m])  # B 2 h w
        em = F.softmax(logits, dim=1)[:, 1]  # B h w
        Es[:, 0, t] = em

        # update key and value
        if t - 1 in to_memorize:
            keys, values = this_keys_m, this_values_m

        #  calculate loss on cuda
        if mode == 'train' or mode == 'val':
            Ms_cuda = Ms[:, 0, t].cuda()
            loss_video += (_loss(logits, Ms_cuda) +
                           0.5 * _loss(p_m2, Ms_cuda) +
                           0.25 * _loss(p_m3, Ms_cuda))

    #  calculate mIOU on cuda
    pred = torch.round(Es.float().cuda())
    if mode == 'train' or mode == 'val':
        video_mIoU = 0
        for n in range(len(Ms)):  # Nth batch
            video_mIoU = video_mIoU + get_video_mIoU(
                pred[n],
                Ms[n].float().cuda())  # mIOU of video(t frames) for each batch
        video_mIoU = video_mIoU / len(Ms)  # mean IoU among batch

        return loss_video / num_frames, video_mIoU

    elif mode == 'test':
        return pred, Es
Esempio n. 5
0
def Run_video(model,
              Fs,
              Ms,
              num_frames,
              solo_results=None,
              Mem_every=None,
              Mem_number=None,
              mode='train'):
    if Mem_every:
        to_memorize = [
            int(i) for i in np.arange(0, num_frames, step=Mem_every)
        ]
    elif Mem_number:
        to_memorize = [
            int(round(i))
            for i in np.linspace(0, num_frames, num=Mem_number + 2)[:-1]
        ]
    else:
        raise NotImplementedError

    B, _, f, H, W = Fs.shape
    Es = torch.zeros(
        (B, 1, f, H, W)).float().cuda()  # [1,1,50,480,864][b,c,t,h,w]
    Es[:, :, 0] = Ms[:, :, 0]

    loss_video = torch.tensor(0.0).cuda()
    loss_total = torch.tensor(0.0).cuda()

    for t in range(1, num_frames):
        # memorize
        pre_key, pre_value = model([Fs[:, :, t - 1], Es[:, :, t - 1]])
        pre_key = pre_key.unsqueeze(2)
        pre_value = pre_value.unsqueeze(2)

        Sm = torch.zeros_like(Es[:, :, 0])
        #process solo result
        st = time.time()
        for b in range(B):
            if mode == 'train':
                gt = Ms[b, :, t]
            else:
                gt = Es[b, :, t - 1]
                gt = torch.round(gt)
            solo = solo_results[b]
            if len(solo) == 0:
                m_ = torch.zeros_like(gt)
            else:
                masks = solo[t][0]
                if masks is not None:
                    ious = []
                    for mask in masks:
                        iou = get_video_mIoU(gt, mask)
                        ious.append(iou)
                    ious = np.array(ious)
                    if np.any(ious >= 0.7):
                        idx = np.argmax(ious)
                        m_ = torch.from_numpy(masks[idx]).cuda()
                    else:
                        m_ = torch.zeros_like(gt)
                else:
                    m_ = torch.zeros_like(gt)
            if len(m_.shape) == 2:
                m_ = m_.unsqueeze(0)
            Sm[b] = m_
        ed = time.time()
        print('Cal IOU time cost: {:.2f}s'.format(ed - st))

        if t - 1 == 0:  # the first frame
            this_keys_m, this_values_m = pre_key, pre_value
        else:  # other frame
            this_keys_m = torch.cat([keys, pre_key], dim=2)
            this_values_m = torch.cat([values, pre_value], dim=2)

        # segment
        logits, p_m2, p_m3 = model(
            [Fs[:, :, t], this_keys_m, this_values_m,
             Sm.detach()])  # B 2 h w
        em = F.softmax(logits, dim=1)[:, 1]  # B h w
        Es[:, 0, t] = em

        #  calculate loss on cuda
        if mode == 'train' or mode == 'val':
            Ms_cuda = Ms[:, 0, t].cuda()
            loss_video += _loss(logits, Ms_cuda) + 0.5 * _loss(
                p_m2, Ms_cuda) + 0.25 * _loss(p_m3, Ms_cuda)
            loss_total = loss_video

        # update key and value
        if t - 1 in to_memorize:
            keys, values = this_keys_m, this_values_m
            # keys, values = this_keys_m.detach(), this_values_m.detach()

    #  calculate mIOU on cuda
    pred = torch.round(Es.float().cuda())
    if mode == 'train' or mode == 'val':
        video_mIoU = 0
        for n in range(len(Ms)):  # Nth batch
            video_mIoU = video_mIoU + get_video_mIoU(
                pred[n],
                Ms[n].cuda())  # mIOU of video(t frames) for each batch
        video_mIoU = video_mIoU / len(Ms)  # mean IoU among batch

        return loss_total / num_frames, video_mIoU

    elif mode == 'test':
        return pred, Es