예제 #1
0
    def __init__(self, args, img_paths, labels, mode=None):
        assert mode == 'train' or mode == 'val'
        self.args = args
        self.img_size = args.input_size
        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.transform = None
        self.interval_rate = 0.1

        # 在 custom_service.py 文件中一定要记得相应的更改
        if self.mode == 'train':
            self.transform = transforms.Compose([
                # tr.CenterCrop(self.img_size, self.interval_rate), # 0.1, random = 0.5
                tr.MaxResize(self.img_size, mode='train'),  # 随机切割, rate=1.2
                tr.RandomHorizontalFlip(),  # 随机水平翻转 0.5
                # tr.RandomGaussianBlur(), # 随机高斯模糊 # 0.5 random.random()
                # tr.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)), # 标准化 mean= (0.485,0.456,0.406),std= (0.229,0.224,0.225)
                tr.ToTensor(),  # 2 tensor
            ])
        elif self.mode == 'val':
            self.transform = transforms.Compose([
                tr.MaxResize(self.img_size, mode='val'),
                # tr.Normalize(mean=(0.485,0.456,0.406), std=(0.229,0.224,0.225)),  # 标准化
                tr.ToTensor(),  # 2 tensor
            ])
예제 #2
0
def train():

    model = Dilated_Net().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = t.optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4)
    frame = MyFrame(model, criterion, K)
    transform = transforms.Compose([tr.RandomHorizontalFlip(), tr.ToTensor()])
    dataset = DAVIS2016(db_root_dir='/root/DAVIS-2016',
                        train=True,
                        transform=transform)
    dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=1)
    loss_meter = meter.AverageValueMeter()
    iou_meter = meter.AverageValueMeter()
    max_iou = 0
    for i in range(1000):
        loss_meter.reset()
        iou_meter.reset()
        for ii, data in enumerate(dataloader):
            img_1 = data['image_1'].to(device)
            img_t = data['image_t'].to(device)
            gt_1 = data['gt_1'].to(device)
            gt_t = data['gt_t'].to(device)
            if gt_1.sum() == 0:
                continue

            frame.set_data(img_1, img_t, gt_1, gt_t)
            optimizer.zero_grad()
            loss, IoU, pred = frame.forward()
            loss.backward()
            optimizer.step()
            vis.img(name='train',
                    img_=overlay_mask(im_normalize(tens2image(img_t.cpu())),
                                      tens2image(pred.cpu())).transpose(
                                          2, 0, 1))
            # loss_temp = loss
            # loss_temp = loss_temp.detach().numpy
            loss_meter.add(loss.item())
            iou_meter.add(IoU.item())
        print("train epoch:{}, loss:{} IoU:{}".format(i,
                                                      loss_meter.value()[0],
                                                      iou_meter.value()[0]))
        vis.log(
            "train epoch:{}, loss:{} IoU:{}".format(i,
                                                    loss_meter.value()[0],
                                                    iou_meter.value()[0]),
            'loss_iou')
        vis.plot('train_loss', loss_meter.value()[0])
        vis.plot('train_iou', iou_meter.value()[0])
        if (i + 1) % 5 == 0:
            with torch.no_grad():
                iou_val = evaluation(frame=frame)
            if max_iou < iou_val:
                max_iou = iou_val
                frame.save_net('train11_28.pth')
    def prepare_dataset(self):
        cfg = self.cfg
        self.print_log('Process dataset...')
        composed_transforms = transforms.Compose([
            tr.RandomScale(cfg.DATA_MIN_SCALE_FACTOR,
                           cfg.DATA_MAX_SCALE_FACTOR, cfg.DATA_SHORT_EDGE_LEN),
            tr.RandomCrop(cfg.DATA_RANDOMCROP),
            tr.RandomHorizontalFlip(cfg.DATA_RANDOMFLIP),
            tr.Resize(cfg.DATA_RANDOMCROP),
            tr.ToTensor()
        ])

        train_datasets = []
        if 'vecolor' in cfg.DATASETS:
            train_vecolor_dataset = VECOLOR(root=cfg.DIR_VECOLOR,
                                            phase='train',
                                            transform=composed_transforms)
            train_datasets.append(train_vecolor_dataset)

        val_transforms = transforms.Compose(
            [tr.Resize(cfg.DATA_RANDOMCROP),
             tr.ToTensor()])
        val_vecolor_dataset = VECOLOR(root=cfg.DIR_VECOLOR,
                                      phase='val',
                                      transform=val_transforms)

        if len(train_datasets) > 1:
            train_dataset = torch.utils.data.ConcatDataset(train_datasets)
        elif len(train_datasets) == 1:
            train_dataset = train_datasets[0]
        else:
            self.print_log('No dataset!')
            exit(0)

        val_dataset = val_vecolor_dataset

        if cfg.DIST_ENABLE:
            self.train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset)
            self.val_sampler = torch.utils.data.distributed.DistributedSampler(
                val_dataset)
        else:
            self.train_sampler = torch.utils.data.distributed.DistributedSampler(
                train_dataset, num_replicas=1, rank=0)
            self.val_sampler = torch.utils.data.distributed.DistributedSampler(
                val_dataset, num_replicas=1, rank=0)

        self.trainloader = DataLoader(train_dataset,
                                      batch_size=int(cfg.TRAIN_BATCH_SIZE /
                                                     cfg.TRAIN_GPUS),
                                      shuffle=False,
                                      num_workers=cfg.DATA_WORKERS,
                                      pin_memory=True,
                                      sampler=self.train_sampler)
        self.valloader = DataLoader(val_dataset,
                                    batch_size=1,
                                    shuffle=False,
                                    num_workers=0,
                                    pin_memory=True,
                                    sampler=self.val_sampler)

        self.print_log('Done!')
예제 #4
0
        gt_1 = data['gt_1'].to(device)
        gt_t = data['gt_t'].to(device)
        if (gt_1.sum() == 0):
            continue
        frame.set_data(img_1, img_t, gt_1, gt_t)
        loss_val, IoU_val = frame.forward()
        loss_meter_val.add(loss_val.item())
        iou_meter_val.add(IoU_val.item())
    return loss_meter_val.value()[0], iou_meter_val.value()[0]


model = Dilated_Net().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = t.optim.Adam(model.parameters(), lr=1e-5, weight_decay=5e-4)
frame = MyFrame(model, criterion, K)
transforms = transforms.Compose([tr.RandomHorizontalFlip(), tr.ToTensor()])
dataset = DAVIS_OVER_FIT_TEST1(db_root_dir='/root/DAVIS-2016',
                               train=True,
                               transform=transforms)
dataloader = DataLoader(dataset, batch_size=1, shuffle=True, num_workers=1)
loss_meter = meter.AverageValueMeter()
iou_meter = meter.AverageValueMeter()
max_iou = 0
for i in range(1000):
    loss_meter.reset()
    iou_meter.reset()
    for ii, data in enumerate(dataloader):
        img_1 = data['image_1'].to(device)
        img_t = data['image_t'].to(device)
        gt_1 = data['gt_1'].to(device)
        gt_t = data['gt_t'].to(device)
예제 #5
0
        exclude_db = [str(db) for db in self.excluded]
        return 'Included datasets:' + str(
            include_db) + '\n' + 'Excluded datasets:' + str(exclude_db)


if __name__ == "__main__":
    import matplotlib.pyplot as plt
    from dataloaders import pascal
    from dataloaders import sbd
    import torch
    import numpy as np
    import dataset.custom_transforms as tr
    from torchvision import transforms

    composed_transforms_tr = transforms.Compose([
        tr.RandomHorizontalFlip(),
        tr.ScaleNRotate(rots=(-15, 15), scales=(.75, 1.25)),
        tr.FixedResize(resolutions={
            'image': (450, 450),
            'gt': (450, 450)
        }),
        tr.DistanceMap(v=0.15, elem='gt'),
        tr.ConcatInputs(elems=('image', 'distance_map')),
        tr.ToTensor()
    ])

    composed_transforms_ts = transforms.Compose([
        tr.FixedResize(resolutions={
            'image': (450, 450),
            'gt': (450, 450)
        }),
예제 #6
0
def evaluation(frame=None):
    with torch.no_grad():
        if frame is None:
            from models.dilated_net import Dilated_Net
            model = Dilated_Net(pretrained=False)
            model.load_state_dict(
                t.load(
                    "/root/PycharmProjects/VideoMatch/checkpoint/train.pth"))
            model = model.to(device)
            criterion = nn.CrossEntropyLoss()
            frame = MyFrame(model, criterion, K)
        loss_meter_val = meter.AverageValueMeter()
        iou_meter_val = meter.AverageValueMeter()
        db_root_dir = '/root/DAVIS-2016'
        fname = 'val_seqs'
        with open(os.path.join(db_root_dir, fname + '.txt')) as f:
            seqs = f.readlines()
            val_files_len = len(seqs)
        for val_index in range(val_files_len):
            transform = transforms.Compose(
                [tr.RandomHorizontalFlip(),
                 tr.ToTensor()])
            dataset_val = DAVIS2016(db_root_dir='/root/DAVIS-2016',
                                    train=False,
                                    transform=transform,
                                    val_index=val_index)
            dataloader_val = DataLoader(dataset_val,
                                        batch_size=1,
                                        shuffle=True,
                                        num_workers=1)
            for ii, data in enumerate(dataloader_val):
                img_1 = data['image_1'].to(device)
                img_t = data['image_t'].to(device)
                gt_1 = data['gt_1'].to(device)
                gt_t = data['gt_t'].to(device)
                if (gt_1.sum() == 0):
                    continue
                frame.set_data(img_1, img_t, gt_1, gt_t)
                loss_val, iou_val, pred = frame.forward()
                vis.img(name='val',
                        img_=overlay_mask(
                            im_normalize(tens2image(img_t.cpu())),
                            tens2image(pred.cpu())).transpose(2, 0, 1))
                loss_meter_val.add(loss_val.item())
                iou_meter_val.add(iou_val.item())

            print("evaluation on sequence:{}, loss: {}, IoU: {}".format(
                seqs[val_index],
                loss_meter_val.value()[0],
                iou_meter_val.value()[0]))
            vis.log(
                "evaluation on sequence:{}, loss: {}, IoU: {}".format(
                    seqs[val_index],
                    loss_meter_val.value()[0],
                    iou_meter_val.value()[0]), 'loss_iou')
        vis.plot('val_loss', loss_meter_val.value()[0])
        vis.plot('val_iou', iou_meter_val.value()[0])
        vis.log("avg of all sequences:{}".format(iou_meter_val.value()[0]),
                'loss_iou')
        if frame is not None:
            return iou_meter_val.value()[0]