def train_transform(self, rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st step of data augmentation transform = transforms.Compose( [ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation can be slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((228, 304)), transforms.HorizontalFlip(do_flip), transforms.Resize(self.output_size), ] ) rgb_np = transform(rgb) rgb_np = self.color_jitter(rgb_np) # random color jittering rgb_np = np.asfarray(rgb_np, dtype="float") / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def val_transform(self, rgb, depth): depth_np = depth transform = transforms.Compose( [ transforms.Resize(250.0 / iheight), transforms.CenterCrop((228, 304)), transforms.Resize(self.output_size), ] ) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype="float") / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def get(cls, args): normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0]) train_dataset = cls(args.data, 'train', args.train_file, args.cache, transform=transforms.Compose([ arraytransforms.RandomResizedCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(torch.cat), ])) val_transforms = transforms.Compose([ arraytransforms.Resize(256), arraytransforms.CenterCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(torch.cat), ]) val_dataset = cls(args.data, 'val', args.val_file, args.cache, transform=val_transforms) valvideo_dataset = cls(args.data, 'val_video', args.val_file, args.cache, transform=val_transforms) return train_dataset, val_dataset, valvideo_dataset
def main(): global args, best_prec1 args = parser.parse_args() # create model if args.arch == 'alexnet': model = model_list.alexnet(pretrained=args.pretrained) input_size = 227 else: raise Exception('Model not supported yet') if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) for m in model.modules(): if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): c = float(m.weight.data[0].nelement()) m.weight.data = m.weight.data.normal_(0, 1.0 / c) elif isinstance(m, nn.BatchNorm2d): m.weight.data = m.weight.data.zero_().add(1.0) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) del checkpoint else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code if args.caffe_data: print('==> Using Caffe Dataset') cwd = os.getcwd() sys.path.append(cwd + '/../') import datasets as datasets import datasets.transforms as transforms if not os.path.exists(args.data + '/imagenet_mean.binaryproto'): print("==> Data directory" + args.data + "does not exits") print("==> Please specify the correct data path by") print("==> --data <DATA_PATH>") return normalize = transforms.Normalize(meanfile=args.data + '/imagenet_mean.binaryproto') train_dataset = datasets.ImageFolder( args.data, transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, transforms.RandomSizedCrop(input_size), ]), Train=True) train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( args.data, transforms.Compose([ transforms.ToTensor(), normalize, transforms.CenterCrop(input_size), ]), Train=False), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) else: print('==> Using Pytorch Dataset') import torchvision import torchvision.transforms as transforms import torchvision.datasets as datasets traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) torchvision.set_image_backend('accimage') train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(input_size, scale=(0.40, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(input_size), transforms.ToTensor(), normalize, ])), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) print model # define the binarization operator global bin_op bin_op = util.BinOp(model) if args.evaluate: validate(val_loader, model, criterion) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def make_mot_transforms(image_set): normalize = T.Compose([ T.ToTensor(), T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) scales = [480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800] scale = scales[1] if image_set == 'train': return T.Compose([ T.RandomHorizontalFlip(), T.Compose([ T.Resize(1000), T.SizeCrop_MOT(1000), T.Resize(scale), ]), normalize, ]) ''' return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ]) ), normalize, ]) ''' if image_set == 'trainall': return T.Compose([ T.RandomHorizontalFlip(), T.RandomSelect( T.RandomResize(scales, max_size=1333), T.Compose([ T.RandomResize([800, 1000, 1200]), # T.RandomSizeCrop(384, 600), T.RandomSizeCrop_MOT(800, 1200), T.RandomResize(scales, max_size=1333), ])), normalize, ]) if image_set == 'val': return T.Compose([ T.RandomHorizontalFlip(), T.Compose([ T.Resize(1000), T.SizeCrop_MOT(1000), T.Resize(scale), ]), normalize, ]) ''' return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) ''' if image_set == 'test': return T.Compose([ T.RandomResize([800], max_size=1333), normalize, ]) ''' return T.Compose([ T.RandomHorizontalFlip(), T.Compose([ T.Resize(1000), T.SizeCrop_MOT(1000), T.Resize(scale), ]), normalize, ]) ''' raise ValueError(f'unknown {image_set}')
def fetch_dataset(data_name): print('fetching data {}...'.format(data_name)) if (data_name == 'MNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) train_dataset = datasets.MNIST(root=train_dir, train=True, download=True, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.MNIST(root=test_dir, train=False, download=True, transform=test_transform) elif (data_name == 'EMNIST' or data_name == 'EMNIST_byclass' or data_name == 'EMNIST_bymerge' or data_name == 'EMNIST_balanced' or data_name == 'EMNIST_letters' or data_name == 'EMNIST_digits' or data_name == 'EMNIST_mnist'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/test'.format(data_name.split('_')[0]) transform = transforms.Compose([transforms.ToTensor()]) split = 'balanced' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.EMNIST(root=train_dir, split=split, branch=branch, train=True, download=True, transform=transform) test_dataset = datasets.EMNIST(root=test_dir, split=split, branch=branch, train=False, download=True, transform=transform) elif (data_name == 'FashionMNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) transform = transforms.Compose([transforms.ToTensor()]) train_dataset = datasets.FashionMNIST(root=train_dir, train=True, download=True, transform=transform) test_dataset = datasets.FashionMNIST(root=test_dir, train=False, download=True, transform=transform) elif (data_name == 'CIFAR10'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR10(train_dir, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR10(test_dir, train=False, transform=test_transform, download=True) elif (data_name == 'CIFAR100'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR100(train_dir, branch=branch, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR100(test_dir, branch=branch, train=False, transform=test_transform, download=True) elif (data_name == 'SVHN'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.SVHN(train_dir, split='train', transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.SVHN(test_dir, split='test', transform=test_transform, download=True) elif (data_name == 'ImageNet'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.ImageFolder(train_dir, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.ImageFolder(test_dir, transform=test_transform) elif (data_name == 'CUB2011'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) train_dataset = datasets.CUB2011(train_dir, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CUB2011(test_dir, transform=test_transform, download=True) elif (data_name == 'WheatImage' or data_name == 'WheatImage_binary' or data_name == 'WheatImage_six'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) label_mode = 'six' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.WheatImage(train_dir, label_mode=label_mode, transform=transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor() ])) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose( [transforms.Resize((224, 288)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.WheatImage(test_dir, label_mode=label_mode, transform=test_transform) elif (data_name == 'CocoDetection'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/instances_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/instances_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoDetection(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoDetection(test_dir, test_ann, transform=transform) elif (data_name == 'CocoCaptions'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/captions_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/captions_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoCaptions(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoCaptions(test_dir, test_ann, transform=transform) elif (data_name == 'VOCDetection'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCDetection(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCDetection(test_dir, 'test', transform=transform) elif (data_name == 'VOCSegmentation'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCSegmentation(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCSegmentation(test_dir, 'test', transform=transform) elif (data_name == 'MOSI' or data_name == 'MOSI_binary' or data_name == 'MOSI_five' or data_name == 'MOSI_seven' or data_name == 'MOSI_regression'): train_dir = './data/{}'.format(data_name.split('_')[0]) test_dir = './data/{}'.format(data_name.split('_')[0]) label_mode = 'five' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.MOSI(train_dir, split='trainval', label_mode=label_mode, download=True) stats = make_stats(train_dataset, batch_size=1) train_transform = transforms.Compose([transforms.Normalize(stats)]) test_transform = transforms.Compose([transforms.Normalize(stats)]) train_dataset.transform = train_transform test_dataset = datasets.MOSI(test_dir, split='test', label_mode=label_mode, download=True, transform=test_transform) elif (data_name == 'Kodak'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) elif (data_name == 'UCID'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) else: raise ValueError('Not valid dataset name') print('data ready') return train_dataset, test_dataset