예제 #1
0
    def train_transform(self, rgb, depth):
        s = np.random.uniform(1.0, 1.5)  # random scaling
        depth_np = depth / s
        angle = np.random.uniform(-5.0, 5.0)  # random rotation degrees
        do_flip = np.random.uniform(0.0, 1.0) < 0.5  # random horizontal flip

        # perform 1st step of data augmentation
        transform = transforms.Compose(
            [
                transforms.Resize(
                    250.0 / iheight
                ),  # this is for computational efficiency, since rotation can be slow
                transforms.Rotate(angle),
                transforms.Resize(s),
                transforms.CenterCrop((228, 304)),
                transforms.HorizontalFlip(do_flip),
                transforms.Resize(self.output_size),
            ]
        )
        rgb_np = transform(rgb)
        rgb_np = self.color_jitter(rgb_np)  # random color jittering
        rgb_np = np.asfarray(rgb_np, dtype="float") / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
예제 #2
0
    def val_transform(self, rgb, depth):
        depth_np = depth
        transform = transforms.Compose(
            [
                transforms.Resize(250.0 / iheight),
                transforms.CenterCrop((228, 304)),
                transforms.Resize(self.output_size),
            ]
        )
        rgb_np = transform(rgb)
        rgb_np = np.asfarray(rgb_np, dtype="float") / 255
        depth_np = transform(depth_np)

        return rgb_np, depth_np
예제 #3
0
 def get(cls, args):
     normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0])
     train_dataset = cls(args.data,
                         'train',
                         args.train_file,
                         args.cache,
                         transform=transforms.Compose([
                             arraytransforms.RandomResizedCrop(224),
                             arraytransforms.ToTensor(),
                             normalize,
                             transforms.Lambda(torch.cat),
                         ]))
     val_transforms = transforms.Compose([
         arraytransforms.Resize(256),
         arraytransforms.CenterCrop(224),
         arraytransforms.ToTensor(),
         normalize,
         transforms.Lambda(torch.cat),
     ])
     val_dataset = cls(args.data,
                       'val',
                       args.val_file,
                       args.cache,
                       transform=val_transforms)
     valvideo_dataset = cls(args.data,
                            'val_video',
                            args.val_file,
                            args.cache,
                            transform=val_transforms)
     return train_dataset, val_dataset, valvideo_dataset
예제 #4
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    # create model
    if args.arch == 'alexnet':
        model = model_list.alexnet(pretrained=args.pretrained)
        input_size = 227
    else:
        raise Exception('Model not supported yet')

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = torch.nn.DataParallel(model.features)
        model.cuda()
    else:
        model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.Adam(model.parameters(),
                                 args.lr,
                                 weight_decay=args.weight_decay)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
            c = float(m.weight.data[0].nelement())
            m.weight.data = m.weight.data.normal_(0, 1.0 / c)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
            del checkpoint
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code

    if args.caffe_data:
        print('==> Using Caffe Dataset')
        cwd = os.getcwd()
        sys.path.append(cwd + '/../')
        import datasets as datasets
        import datasets.transforms as transforms
        if not os.path.exists(args.data + '/imagenet_mean.binaryproto'):
            print("==> Data directory" + args.data + "does not exits")
            print("==> Please specify the correct data path by")
            print("==>     --data <DATA_PATH>")
            return

        normalize = transforms.Normalize(meanfile=args.data +
                                         '/imagenet_mean.binaryproto')

        train_dataset = datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
                transforms.RandomSizedCrop(input_size),
            ]),
            Train=True)

        train_sampler = None

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=False,
                                                   num_workers=args.workers,
                                                   pin_memory=True,
                                                   sampler=train_sampler)

        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            args.data,
            transforms.Compose([
                transforms.ToTensor(),
                normalize,
                transforms.CenterCrop(input_size),
            ]),
            Train=False),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)
    else:
        print('==> Using Pytorch Dataset')
        import torchvision
        import torchvision.transforms as transforms
        import torchvision.datasets as datasets
        traindir = os.path.join(args.data, 'train')
        valdir = os.path.join(args.data, 'val')
        normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                         std=[0.229, 0.224, 0.225])

        torchvision.set_image_backend('accimage')

        train_dataset = datasets.ImageFolder(
            traindir,
            transforms.Compose([
                transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                normalize,
            ]))

        train_loader = torch.utils.data.DataLoader(train_dataset,
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   num_workers=args.workers,
                                                   pin_memory=True)
        val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
            valdir,
            transforms.Compose([
                transforms.Resize(256),
                transforms.CenterCrop(input_size),
                transforms.ToTensor(),
                normalize,
            ])),
                                                 batch_size=args.batch_size,
                                                 shuffle=False,
                                                 num_workers=args.workers,
                                                 pin_memory=True)

    print model

    # define the binarization operator
    global bin_op
    bin_op = util.BinOp(model)

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            }, is_best)
예제 #5
0
def make_mot_transforms(image_set):

    normalize = T.Compose([
        T.ToTensor(),
        T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800]
    scale = scales[1]

    if image_set == 'train':
        return T.Compose([
            T.RandomHorizontalFlip(),
            T.Compose([
                T.Resize(1000),
                T.SizeCrop_MOT(1000),
                T.Resize(scale),
            ]),
            normalize,
        ])
        '''
        return T.Compose([
            
            T.RandomHorizontalFlip(),
            T.RandomSelect(
                T.RandomResize(scales, max_size=1333),
                T.Compose([
                    T.RandomResize([800, 1000, 1200]),
#                     T.RandomSizeCrop(384, 600),
                    T.RandomSizeCrop_MOT(800, 1200),
                    T.RandomResize(scales, max_size=1333),
                ])
            ),
            
            normalize,
        ])
        '''
    if image_set == 'trainall':
        return T.Compose([
            T.RandomHorizontalFlip(),
            T.RandomSelect(
                T.RandomResize(scales, max_size=1333),
                T.Compose([
                    T.RandomResize([800, 1000, 1200]),
                    #                     T.RandomSizeCrop(384, 600),
                    T.RandomSizeCrop_MOT(800, 1200),
                    T.RandomResize(scales, max_size=1333),
                ])),
            normalize,
        ])

    if image_set == 'val':
        return T.Compose([
            T.RandomHorizontalFlip(),
            T.Compose([
                T.Resize(1000),
                T.SizeCrop_MOT(1000),
                T.Resize(scale),
            ]),
            normalize,
        ])
        '''
         return T.Compose([
            T.RandomResize([800], max_size=1333),
            normalize,
        ])
         '''

    if image_set == 'test':
        return T.Compose([
            T.RandomResize([800], max_size=1333),
            normalize,
        ])
        '''
        return T.Compose([
            T.RandomHorizontalFlip(),
            T.Compose([
                    T.Resize(1000),
                    T.SizeCrop_MOT(1000),
                    T.Resize(scale),
                ]),
            normalize,
        ])
        '''

    raise ValueError(f'unknown {image_set}')
def fetch_dataset(data_name):
    print('fetching data {}...'.format(data_name))
    if (data_name == 'MNIST'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/test'.format(data_name)
        train_dataset = datasets.MNIST(root=train_dir,
                                       train=True,
                                       download=True,
                                       transform=transforms.ToTensor())
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([transforms.ToTensor()])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.MNIST(root=test_dir,
                                      train=False,
                                      download=True,
                                      transform=test_transform)

    elif (data_name == 'EMNIST' or data_name == 'EMNIST_byclass'
          or data_name == 'EMNIST_bymerge' or data_name == 'EMNIST_balanced'
          or data_name == 'EMNIST_letters' or data_name == 'EMNIST_digits'
          or data_name == 'EMNIST_mnist'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/test'.format(data_name.split('_')[0])
        transform = transforms.Compose([transforms.ToTensor()])
        split = 'balanced' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.EMNIST(root=train_dir,
                                        split=split,
                                        branch=branch,
                                        train=True,
                                        download=True,
                                        transform=transform)
        test_dataset = datasets.EMNIST(root=test_dir,
                                       split=split,
                                       branch=branch,
                                       train=False,
                                       download=True,
                                       transform=transform)

    elif (data_name == 'FashionMNIST'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/test'.format(data_name)
        transform = transforms.Compose([transforms.ToTensor()])
        train_dataset = datasets.FashionMNIST(root=train_dir,
                                              train=True,
                                              download=True,
                                              transform=transform)
        test_dataset = datasets.FashionMNIST(root=test_dir,
                                             train=False,
                                             download=True,
                                             transform=transform)

    elif (data_name == 'CIFAR10'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.CIFAR10(train_dir,
                                         train=True,
                                         transform=transforms.ToTensor(),
                                         download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CIFAR10(test_dir,
                                        train=False,
                                        transform=test_transform,
                                        download=True)

    elif (data_name == 'CIFAR100'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.CIFAR100(train_dir,
                                          branch=branch,
                                          train=True,
                                          transform=transforms.ToTensor(),
                                          download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CIFAR100(test_dir,
                                         branch=branch,
                                         train=False,
                                         transform=test_transform,
                                         download=True)

    elif (data_name == 'SVHN'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.SVHN(train_dir,
                                      split='train',
                                      transform=transforms.ToTensor(),
                                      download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
            test_transform = transforms.Compose(
                [transforms.ToTensor(),
                 transforms.Normalize(stats)])
        else:
            train_transform = transforms.Compose([transforms.ToTensor()])
            test_transform = transforms.Compose([transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.SVHN(test_dir,
                                     split='test',
                                     transform=test_transform,
                                     download=True)

    elif (data_name == 'ImageNet'):
        train_dir = './data/{}/train'.format(data_name)
        test_dir = './data/{}/validation'.format(data_name)
        train_dataset = datasets.ImageFolder(train_dir,
                                             transform=transforms.ToTensor())
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.ImageFolder(test_dir, transform=test_transform)

    elif (data_name == 'CUB2011'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/validation'.format(data_name.split('_')[0])
        train_dataset = datasets.CUB2011(train_dir,
                                         transform=transforms.Compose([
                                             transforms.Resize((224, 224)),
                                             transforms.ToTensor()
                                         ]),
                                         download=True)
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 224)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 224)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.CUB2011(test_dir,
                                        transform=test_transform,
                                        download=True)

    elif (data_name == 'WheatImage' or data_name == 'WheatImage_binary'
          or data_name == 'WheatImage_six'):
        train_dir = './data/{}/train'.format(data_name.split('_')[0])
        test_dir = './data/{}/validation'.format(data_name.split('_')[0])
        label_mode = 'six' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.WheatImage(train_dir,
                                            label_mode=label_mode,
                                            transform=transforms.Compose([
                                                transforms.Resize((224, 288)),
                                                transforms.ToTensor()
                                            ]))
        if (normalize):
            stats = make_stats(train_dataset, batch_size=128)
            train_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
            test_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.ToTensor(),
                transforms.Normalize(stats)
            ])
        else:
            train_transform = transforms.Compose([
                transforms.Resize((224, 288)),
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ToTensor()
            ])
            test_transform = transforms.Compose(
                [transforms.Resize((224, 288)),
                 transforms.ToTensor()])
        train_dataset.transform = train_transform
        test_dataset = datasets.WheatImage(test_dir,
                                           label_mode=label_mode,
                                           transform=test_transform)

    elif (data_name == 'CocoDetection'):
        train_dir = './data/Coco/train2017'
        train_ann = './data/Coco/annotations/instances_train2017.json'
        test_dir = './data/Coco/val2017'
        test_ann = './data/Coco/annotations/instances_val2017.json'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.CocoDetection(train_dir,
                                               train_ann,
                                               transform=transform)
        test_dataset = datasets.CocoDetection(test_dir,
                                              test_ann,
                                              transform=transform)

    elif (data_name == 'CocoCaptions'):
        train_dir = './data/Coco/train2017'
        train_ann = './data/Coco/annotations/captions_train2017.json'
        test_dir = './data/Coco/val2017'
        test_ann = './data/Coco/annotations/captions_val2017.json'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.CocoCaptions(train_dir,
                                              train_ann,
                                              transform=transform)
        test_dataset = datasets.CocoCaptions(test_dir,
                                             test_ann,
                                             transform=transform)

    elif (data_name == 'VOCDetection'):
        train_dir = './data/VOC/VOCdevkit'
        test_dir = './data/VOC/VOCdevkit'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.VOCDetection(train_dir,
                                              'trainval',
                                              transform=transform)
        test_dataset = datasets.VOCDetection(test_dir,
                                             'test',
                                             transform=transform)

    elif (data_name == 'VOCSegmentation'):
        train_dir = './data/VOC/VOCdevkit'
        test_dir = './data/VOC/VOCdevkit'
        transform = transforms.Compose(
            [transforms.Resize((224, 224)),
             transforms.ToTensor()])
        train_dataset = datasets.VOCSegmentation(train_dir,
                                                 'trainval',
                                                 transform=transform)
        test_dataset = datasets.VOCSegmentation(test_dir,
                                                'test',
                                                transform=transform)

    elif (data_name == 'MOSI' or data_name == 'MOSI_binary'
          or data_name == 'MOSI_five' or data_name == 'MOSI_seven'
          or data_name == 'MOSI_regression'):
        train_dir = './data/{}'.format(data_name.split('_')[0])
        test_dir = './data/{}'.format(data_name.split('_')[0])
        label_mode = 'five' if len(
            data_name.split('_')) == 1 else data_name.split('_')[1]
        train_dataset = datasets.MOSI(train_dir,
                                      split='trainval',
                                      label_mode=label_mode,
                                      download=True)
        stats = make_stats(train_dataset, batch_size=1)
        train_transform = transforms.Compose([transforms.Normalize(stats)])
        test_transform = transforms.Compose([transforms.Normalize(stats)])
        train_dataset.transform = train_transform
        test_dataset = datasets.MOSI(test_dir,
                                     split='test',
                                     label_mode=label_mode,
                                     download=True,
                                     transform=test_transform)

    elif (data_name == 'Kodak'):
        train_dataset = None
        transform = transforms.Compose([transforms.ToTensor()])
        test_dir = './data/{}'.format(data_name)
        train_dataset = datasets.ImageFolder(test_dir, transform)
        test_dataset = datasets.ImageFolder(test_dir, transform)

    elif (data_name == 'UCID'):
        train_dataset = None
        transform = transforms.Compose([transforms.ToTensor()])
        test_dir = './data/{}'.format(data_name)
        train_dataset = datasets.ImageFolder(test_dir, transform)
        test_dataset = datasets.ImageFolder(test_dir, transform)
    else:
        raise ValueError('Not valid dataset name')
    print('data ready')
    return train_dataset, test_dataset