Exemple #1
0
    'weight_decay': 5e-4,
    'momentum': 0.95,
    'snapshot': '',
    'pretrain': os.path.join(ckpt_path, 'VideoSaliency_2019-12-24 22:05:11', '50000.pth'),
    # 'pretrain': '',
    'imgs_file': 'Pre-train/pretrain_all_seq_DUT_TR_DAFB2_DAVSOD2.txt',
    # 'imgs_file': 'video_saliency/train_all_DAFB2_DAVSOD_5f.txt',
    'train_loader': 'both'
    # 'train_loader': 'video_sequence'
}

imgs_file = os.path.join(datasets_root, args['imgs_file'])
# imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt')

joint_transform = joint_transforms.Compose([
    joint_transforms.ImageResize(520),
    joint_transforms.RandomCrop(473),
    joint_transforms.RandomHorizontallyFlip(),
    joint_transforms.RandomRotate(10)
])

# joint_seq_transform = joint_transforms.Compose([
#     joint_transforms.ImageResize(520),
#     joint_transforms.RandomCrop(473)
# ])

input_size = (473, 473)

img_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
Exemple #2
0
    'imgs_file2': 'Pre-train/pretrain_all_seq_DUT_TR_DAFB2.txt',
    # 'imgs_file': 'video_saliency/train_all_DAFB2_DAVSOD_5f.txt',
    # 'train_loader': 'video_image'
    'train_loader': 'flow_image3',
    # 'train_loader': 'video_sequence'
    'image_size': 430,
    'crop_size': 380,
    'self_distill': 0.1,
    'teacher_distill': 0.6
}

imgs_file = os.path.join(datasets_root, args['imgs_file'])
# imgs_file = os.path.join(datasets_root, 'video_saliency/train_all_DAFB3_seq_5f.txt')

joint_transform = joint_transforms.Compose([
    joint_transforms.ImageResize(args['image_size']),
    joint_transforms.RandomCrop(args['crop_size']),
    # joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05),
    joint_transforms.RandomHorizontallyFlip(),
    joint_transforms.RandomRotate(10)
])

# joint_transform = joint_transforms.Compose([
#     joint_transforms.ImageResize(290),
#     joint_transforms.RandomCrop(256),
#     joint_transforms.RandomHorizontallyFlip(),
#     joint_transforms.RandomRotate(10)
# ])

# joint_seq_transform = joint_transforms.Compose([
#     joint_transforms.ImageResize(520),
def train_online(net, seq_name='breakdance'):
    online_args = {
        'iter_num': 100,
        'train_batch_size': 5,
        'lr': 1e-8,
        'lr_decay': 0.95,
        'weight_decay': 5e-4,
        'momentum': 0.95,
    }

    joint_transform = joint_transforms.Compose([
        joint_transforms.ImageResize(473),
        # joint_transforms.RandomCrop(473),
        # joint_transforms.RandomHorizontallyFlip(),
        # joint_transforms.RandomRotate(10)
    ])
    target_transform = transforms.ToTensor()
    train_set = VideoFirstImageFolder(to_test['davis'], gt_root, seq_name,
                                      online_args['train_batch_size'],
                                      joint_transform, img_transform,
                                      target_transform)
    online_train_loader = DataLoader(
        train_set,
        batch_size=online_args['train_batch_size'],
        num_workers=1,
        shuffle=False)

    optimizer = optim.SGD([{
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] == 'bias'
        ],
        'lr':
        2 * online_args['lr']
    }, {
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] != 'bias'
        ],
        'lr':
        online_args['lr'],
        'weight_decay':
        online_args['weight_decay']
    }],
                          momentum=online_args['momentum'])

    criterion = nn.BCEWithLogitsLoss().cuda()
    net.train().cuda()
    fix_parameters(net.named_parameters())
    for curr_iter in range(0, online_args['iter_num']):
        total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter(
        ), AvgMeter()
        loss2_record, loss3_record, loss4_record = AvgMeter(), AvgMeter(
        ), AvgMeter()

        for i, data in enumerate(online_train_loader):
            optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * (
                1 - float(curr_iter) /
                online_args['iter_num'])**online_args['lr_decay']
            optimizer.param_groups[1]['lr'] = online_args['lr'] * (
                1 - float(curr_iter) /
                online_args['iter_num'])**online_args['lr_decay']
            inputs, labels = data
            batch_size = inputs.size(0)
            inputs = Variable(inputs).cuda()
            labels = Variable(labels).cuda()

            optimizer.zero_grad()
            outputs0, outputs1, outputs2, outputs3, outputs4 = net(inputs)
            loss0 = criterion(outputs0, labels)
            loss1 = criterion(outputs1, labels.narrow(0, 1, 4))
            loss2 = criterion(outputs2, labels.narrow(0, 2, 3))
            loss3 = criterion(outputs3, labels.narrow(0, 3, 2))
            loss4 = criterion(outputs4, labels.narrow(0, 4, 1))

            total_loss = loss0 + loss1 + loss2 + loss3 + loss4
            total_loss.backward()
            optimizer.step()

            total_loss_record.update(total_loss.data, batch_size)
            loss0_record.update(loss0.data, batch_size)
            loss1_record.update(loss1.data, batch_size)
            loss2_record.update(loss2.data, batch_size)
            loss3_record.update(loss3.data, batch_size)
            loss4_record.update(loss4.data, batch_size)

            log = '[iter %d], [total loss %.5f], [loss0 %.5f], [loss1 %.5f], [loss2 %.5f], [loss3 %.5f], ' \
                  '[loss4 %.5f], [lr %.13f]' % \
                  (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg,
                   loss3_record.avg, loss4_record.avg, optimizer.param_groups[1]['lr'])
            print(log)

    return net
Exemple #4
0
def train_online(net, seq_name='breakdance'):
    online_args = {
        'iter_num': 100,
        'train_batch_size': 1,
        'lr': 1e-10,
        'lr_decay': 0.95,
        'weight_decay': 5e-4,
        'momentum': 0.95,
    }

    joint_transform = joint_transforms.Compose([
        joint_transforms.ImageResize(380),
        # joint_transforms.RandomCrop(473),
        # joint_transforms.RandomHorizontallyFlip(),
        # joint_transforms.RandomRotate(10)
    ])
    target_transform = transforms.ToTensor()
    # train_set = VideoFSImageFolder(to_test['davis'], seq_name, use_first=True, joint_transform=joint_transform, transform=img_transform)
    train_set = VideoFirstImageFolder(to_test['davis'],
                                      gt_root,
                                      seq_name,
                                      joint_transform=joint_transform,
                                      transform=img_transform,
                                      target_transform=target_transform)
    online_train_loader = DataLoader(
        train_set,
        batch_size=online_args['train_batch_size'],
        num_workers=1,
        shuffle=False)

    # criterion = nn.MSELoss().cuda()
    criterion = nn.BCEWithLogitsLoss().cuda()
    erosion = Erosion2d(1, 1, 5, soft_max=False).cuda()
    net.train()
    net.cuda()
    # fix_parameters(net.named_parameters())

    optimizer = optim.SGD([{
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] == 'bias'
        ],
        'lr':
        2 * online_args['lr']
    }, {
        'params': [
            param
            for name, param in net.named_parameters() if name[-4:] != 'bias'
        ],
        'lr':
        online_args['lr'],
        'weight_decay':
        online_args['weight_decay']
    }],
                          momentum=online_args['momentum'])

    for curr_iter in range(0, online_args['iter_num']):
        total_loss_record, loss0_record, loss1_record = AvgMeter(), AvgMeter(
        ), AvgMeter()
        loss2_record = AvgMeter()

        for i, data in enumerate(online_train_loader):
            optimizer.param_groups[0]['lr'] = 2 * online_args['lr'] * (
                1 - float(curr_iter) /
                online_args['iter_num'])**online_args['lr_decay']
            optimizer.param_groups[1]['lr'] = online_args['lr'] * (
                1 - float(curr_iter) /
                online_args['iter_num'])**online_args['lr_decay']
            inputs, labels = data
            batch_size = inputs.size(0)
            inputs = Variable(inputs).cuda()
            labels = Variable(labels).cuda()

            optimizer.zero_grad()
            if args['model'] == 'BASNet':
                total_loss, loss0, loss1, loss2 = train_BASNet(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'R3Net':
                total_loss, loss0, loss1, loss2 = train_R3Net(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'DSSNet':
                total_loss, loss0, loss1, loss2 = train_DSSNet(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'CPD':
                total_loss, loss0, loss1, loss2 = train_CPD(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'RAS':
                total_loss, loss0, loss1, loss2 = train_RAS(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'PoolNet':
                total_loss, loss0, loss1, loss2 = train_PoolNet(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'F3Net':
                total_loss, loss0, loss1, loss2 = train_F3Net(
                    net, inputs, criterion, erosion, labels)
            elif args['model'] == 'R2Net':
                total_loss, loss0, loss1, loss2 = train_R2Net(
                    net, inputs, criterion, erosion, labels)
            total_loss.backward()
            optimizer.step()

            total_loss_record.update(total_loss.data, batch_size)
            loss0_record.update(loss0.data, batch_size)
            loss1_record.update(loss1.data, batch_size)
            loss2_record.update(loss2.data, batch_size)
            # loss3_record.update(loss3.data, batch_size)
            # loss4_record.update(loss4.data, batch_size)

            log = '[iter %d], [total loss %.5f], [loss0 %.8f], [loss1 %.8f], [loss2 %.8f], [lr %.13f]' % \
                  (curr_iter, total_loss_record.avg, loss0_record.avg, loss1_record.avg, loss2_record.avg,
                   optimizer.param_groups[1]['lr'])
            print(log)

    print('taking snapshot ...')
    torch.save(
        net.state_dict(),
        os.path.join(ckpt_path, exp_name,
                     str(args['snapshot']) + '_' + seq_name + '_online.pth'))
    # torch.save(optimizer.state_dict(),
    #            os.path.join(ckpt_path, exp_name, '%d_optim.pth' % curr_iter))

    return net
Exemple #5
0
        img, gt = flip(img, gt, flip_p)
        img, gt = rotate(img, gt, degree_random)
        return img, gt

    def __len__(self):
        return len(self.imgs)

if __name__ == '__main__':
    from torchvision import transforms

    import joint_transforms
    from torch.utils.data import DataLoader
    from config import msra10k_path, video_seq_path, video_seq_gt_path, video_train_path
    import numpy as np
    joint_transform = joint_transforms.Compose([
        joint_transforms.ImageResize(250),
        joint_transforms.RandomCrop(200),
        # joint_transforms.ColorJitter(hue=[-0.1, 0.1], saturation=0.05),
        joint_transforms.RandomHorizontallyFlip(),
        joint_transforms.RandomRotate(10)
    ])

    joint_seq_transform = joint_transforms.Compose([
        joint_transforms.ImageResize(250),
        joint_transforms.RandomCrop(200)
    ])

    img_transform = transforms.Compose([
        # transforms.ColorJitter(hue=[-0.1, 0.1]),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])