def get(args): """ Entry point. Call this function to get all Charades dataloaders """ normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0]) train_file = args.train_file val_file = args.val_file train_dataset = Charadesflow( args.data, 'train', train_file, args.cache, transform=transforms.Compose([ arraytransforms.RandomResizedCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ])) val_transforms = transforms.Compose([ arraytransforms.Resize(256), arraytransforms.CenterCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ]) val_dataset = Charadesflow( args.data, 'val', val_file, args.cache, transform=val_transforms) valvideo_dataset = Charadesflow( args.data, 'val_video', val_file, args.cache, transform=val_transforms) return train_dataset, val_dataset, valvideo_dataset
def get_emotion(current_face, gray): """ inputs: current_face: (xmin, ymin, w, h) gray: grayscale frame outputs: emotion: from -1 to 1, 1 being most positive, -1 being most negative """ cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) # crop face from grayscale frame xmin = current_face[0] xmax = current_face[0] + current_face[2] ymin = current_face[1] ymax = current_face[1] + current_face[3] face = gray[ymin:ymax,xmin:xmax] # resize and transform face = (resize(face, (48,48), mode='symmetric')*255).astype('uint8') img = face[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) img = Image.fromarray(img) inputs = transform_test(img) class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] # set device, load model device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = vgg.VGG('VGG19') checkpoint = torch.load('PrivateTest_model.t7') net.load_state_dict(checkpoint['net']) net.to(device) net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) inputs = inputs.to(device) with torch.no_grad(): inputs = Variable(inputs) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops weights = np.array([-0.4,-0.1,-0.1,0.8,-0.4,0.2]) score = F.softmax(outputs_avg, dim=0) emotion_score = np.sum(score.cpu().detach().numpy()[:6]**0.5*weights) return emotion_score
def img_10crop(self,img): """ 数据增强 将图片在左上角,左下角,右上角,右下角,中心进行切割和并做镜像操作,这样的操作使得数据库扩大了10倍 """ cut_size = [44, 44] # 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) inputs = transform_test(img) return inputs
def getEmotionScore(imageDirectory, resizeImg): cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) raw_img = io.imread(imageDirectory) if resizeImg == True: raw_img = cropND(raw_img, (400, 400)) io.imsave("screenshot.jpg", raw_img) gray = rgb2gray(raw_img) gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) img = Image.fromarray(img) inputs = transform_test(img) net = VGG('VGG19') checkpoint = torch.load( os.path.join('FER2013_VGG19', 'PrivateTest_model.t7')) net.load_state_dict(checkpoint['net']) net.cuda() net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops score = F.softmax(outputs_avg) _, predicted = torch.max(outputs_avg.data, 0) scoreDict = {} for i in range(len(class_names)): scoreDict[class_names[i]] = float(score.data.cpu().numpy()[i]) scoreDict["expression"] = str(class_names[int(predicted.cpu().numpy())]) return scoreDict
def img2mood(raw_img): cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) #raw_img = io.imread('images/1.jpg') gray = rgb2gray(raw_img) gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) img = Image.fromarray(img) inputs = transform_test(img) class_names = [ 'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral' ] net = VGG('VGG19') checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7'), map_location='cpu') net.load_state_dict(checkpoint['net']) #net.cuda() net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) #inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops score = F.softmax(outputs_avg) _, predicted = torch.max(outputs_avg.data, 0) emojis_img = io.imread('images/emojis/%s.png' % str(class_names[int(predicted.cpu().numpy())])) print("The Expression is %s" % str(class_names[int(predicted.cpu().numpy())])) return int(predicted.cpu().numpy()), emojis_img '''
def predict_emotion(file_name): raw_img = io.imread(file_name) cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) gray = rgb2gray(raw_img) gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) #(48,48,3) img = Image.fromarray(img) #转化成PIL文件 inputs = transform_test(img) #transform class_names = [ 'Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral' ] net = VGG('VGG19') #加载网络模型 checkpoint = torch.load(os.path.join( 'F:/Study/First_Grade/Winter_Vacation/Real-time-face-recognition-master/Facial-Expression-Recognition.Pytorch-master/FER2013_VGG19', 'PrivateTest_model.t7'), map_location='cpu') net.load_state_dict(checkpoint['net']) # net.cuda() net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) # inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops #使用数据增强后的结果测试效果会更强 score = F.softmax(outputs_avg) # print(type(score)) #tensor _, predicted = torch.max(score.data, 0) return score, predicted
path = os.path.join(opt.dataset + '_' + opt.model, str(opt.fold)) # Data print('==> Preparing data..') transform_train = transforms.Compose([ transforms.RandomCrop(cut_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) # 数据扩增 # 随机切割,扩充数据集,减缓了过拟合的作用 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) trainset = CK(split='Training', fold=opt.fold, transform=transform_train) trainloader = torch.utils.data.DataLoader(trainset, batch_size=opt.bs, shuffle=True, num_workers=0) testset = CK(split='Testing', fold=opt.fold, transform=transform_test) testloader = torch.utils.data.DataLoader(testset, batch_size=5, shuffle=False, num_workers=0) # Model if opt.model == 'VGG19':
def main(): global best_prec1 best_prec1 = 0 global val_acc val_acc = [] global class_num class_num = args.dataset == 'cifar10' and 10 or 100 normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: if args.autoaugment: print('Autoaugment') transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4), mode='reflect').squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), CIFAR10Policy(), transforms.ToTensor(), Cutout(n_holes=args.n_holes, length=args.length), normalize, ]) elif args.cutout: print('Cutout') transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4), mode='reflect').squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), Cutout(n_holes=args.n_holes, length=args.length), normalize, ]) else: print('Standrad Augmentation!') transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad(x.unsqueeze(0), (4, 4, 4, 4), mode='reflect').squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) kwargs = {'num_workers': 1, 'pin_memory': True} assert (args.dataset == 'cifar10' or args.dataset == 'cifar100') train_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=True, download=True, transform=transform_train), batch_size=training_configurations[args.model]['batch_size'], shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=False, transform=transform_test), batch_size=training_configurations[args.model]['batch_size'], shuffle=True, **kwargs) # create model if args.model == 'resnet': model = eval('networks.resnet.resnet' + str(args.layers) + '_cifar')(dropout_rate=args.droprate) elif args.model == 'se_resnet': model = eval('networks.se_resnet.resnet' + str(args.layers) + '_cifar')(dropout_rate=args.droprate) elif args.model == 'wideresnet': model = networks.wideresnet.WideResNet(args.layers, args.dataset == 'cifar10' and 10 or 100, args.widen_factor, dropRate=args.droprate) elif args.model == 'se_wideresnet': model = networks.se_wideresnet.WideResNet( args.layers, args.dataset == 'cifar10' and 10 or 100, args.widen_factor, dropRate=args.droprate) elif args.model == 'densenet_bc': model = networks.densenet_bc.DenseNet( growth_rate=args.growth_rate, block_config=(int((args.layers - 4) / 6), ) * 3, compression=args.compression_rate, num_init_features=24, bn_size=args.bn_size, drop_rate=args.droprate, small_inputs=True, efficient=False) elif args.model == 'shake_pyramidnet': model = networks.shake_pyramidnet.PyramidNet(dataset=args.dataset, depth=args.layers, alpha=args.alpha, num_classes=class_num, bottleneck=True) elif args.model == 'resnext': if args.cardinality == 8: model = networks.resnext.resnext29_8_64(class_num) if args.cardinality == 16: model = networks.resnext.resnext29_16_64(class_num) elif args.model == 'shake_shake': if args.widen_factor == 112: model = networks.shake_shake.shake_resnet26_2x112d(class_num) if args.widen_factor == 32: model = networks.shake_shake.shake_resnet26_2x32d(class_num) if args.widen_factor == 96: model = networks.shake_shake.shake_resnet26_2x32d(class_num) elif args.model == 'shake_shake_x': model = networks.shake_shake.shake_resnext29_2x4x64d(class_num) if not os.path.isdir(check_point): mkdir_p(check_point) fc = Full_layer(int(model.feature_num), class_num) print('Number of final features: {}'.format(int(model.feature_num))) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]) + sum([p.data.nelement() for p in fc.parameters()]))) cudnn.benchmark = True # define loss function (criterion) and optimizer isda_criterion = ISDALoss(int(model.feature_num), class_num).cuda() ce_criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD( [{ 'params': model.parameters() }, { 'params': fc.parameters() }], lr=training_configurations[args.model]['initial_learning_rate'], momentum=training_configurations[args.model]['momentum'], nesterov=training_configurations[args.model]['nesterov'], weight_decay=training_configurations[args.model]['weight_decay']) model = torch.nn.DataParallel(model).cuda() fc = nn.DataParallel(fc).cuda() if args.resume: # Load checkpoint. print('==> Resuming from checkpoint..') assert os.path.isfile( args.resume), 'Error: no checkpoint directory found!' args.checkpoint = os.path.dirname(args.resume) checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) fc.load_state_dict(checkpoint['fc']) optimizer.load_state_dict(checkpoint['optimizer']) isda_criterion = checkpoint['isda_criterion'] val_acc = checkpoint['val_acc'] best_prec1 = checkpoint['best_acc'] np.savetxt(accuracy_file, np.array(val_acc)) else: start_epoch = 0 for epoch in range(start_epoch, training_configurations[args.model]['epochs']): adjust_learning_rate(optimizer, epoch + 1) # train for one epoch train(train_loader, model, fc, isda_criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, fc, ce_criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'fc': fc.state_dict(), 'best_acc': best_prec1, 'optimizer': optimizer.state_dict(), 'isda_criterion': isda_criterion, 'val_acc': val_acc, }, is_best, checkpoint=check_point) print('Best accuracy: ', best_prec1) np.savetxt(accuracy_file, np.array(val_acc)) print('Best accuracy: ', best_prec1) print('Average accuracy', sum(val_acc[len(val_acc) - 10:]) / 10) # val_acc.append(sum(val_acc[len(val_acc) - 10:]) / 10) # np.savetxt(val_acc, np.array(val_acc)) np.savetxt(accuracy_file, np.array(val_acc))
def computeResult(data): try: cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack([transforms.ToTensor()(crop) for crop in crops])), ]) #Uses image data in array to compute the image raw_img = np.array(data, dtype=np.uint8) gray = rgb2gray(raw_img) gray = resize(gray, (48,48), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) # img = Image.fromarray(img) img = Image.fromarray(img) inputs = transform_test(img) class_names = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral'] net = VGG('VGG19') checkpoint = torch.load(os.path.join('FER2013_VGG19', 'PrivateTest_model.t7')) net.load_state_dict(checkpoint['net']) net.cuda() net.eval() ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops score = F.softmax(outputs_avg) _, predicted = torch.max(outputs_avg.data, 0) plt.rcParams['figure.figsize'] = (13.5,5.5) axes=plt.subplot(1, 3, 1) plt.imshow(raw_img) plt.xlabel('Input Image', fontsize=16) axes.set_xticks([]) axes.set_yticks([]) plt.tight_layout() plt.subplots_adjust(left=0.05, bottom=0.2, right=0.95, top=0.9, hspace=0.02, wspace=0.3) plt.subplot(1, 3, 2) ind = 0.1+0.6*np.arange(len(class_names)) # the x locations for the groups width = 0.4 # the width of the bars: can also be len(x) sequence color_list = ['red','orangered','darkorange','limegreen','darkgreen','royalblue','navy'] for i in range(len(class_names)): plt.bar(ind[i], score.data.cpu().numpy()[i], width, color=color_list[i]) plt.title("Classification results ",fontsize=20) plt.xlabel(" Expression Category ",fontsize=16) plt.ylabel(" Classification Score ",fontsize=16) plt.xticks(ind, class_names, rotation=45, fontsize=14) axes=plt.subplot(1, 3, 3) emojis_img = io.imread('./images/emojis/%s.png' % str(class_names[int(predicted.cpu().numpy())])) plt.imshow(emojis_img) plt.xlabel('Emoji Expression', fontsize=16) axes.set_xticks([]) axes.set_yticks([]) plt.tight_layout() # show emojis #plt.show() plt.savefig(os.path.join('./images/results/' + 'results.jpg')) plt.close() print("Result:" + "%s" %str(class_names[int(predicted.cpu().numpy())])) except: print('Cannot find image')
def show_anim(robot, cnt): # nonlocal model # read the image sequence for classification img_path = config.img_path img_names = [ os.path.join(img_path, 'face_round{}_cnt{}.jpg'.format(cnt, i)) for i in range(10) ] face_seq = [io.imread(img) for img in img_names] anim_prob = np.zeros(len(config.anim_names)) for face in face_seq: ## use the model loaded to predict probabilities ## input one figure and get a one-dim array of size 7 ''' complete here !!! ''' cut_size = 44 transform_test = transforms.Compose([ transforms.TenCrop(cut_size), transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops])), ]) raw_img = face gray = rgb2gray(raw_img) gray = resize(gray, (48, 48), mode='symmetric').astype(np.uint8) img = gray[:, :, np.newaxis] img = np.concatenate((img, img, img), axis=2) img = Image.fromarray(img) inputs = transform_test(img) ncrops, c, h, w = np.shape(inputs) inputs = inputs.view(-1, c, h, w) #inputs = inputs.cuda() inputs = Variable(inputs, volatile=True) outputs = net(inputs) outputs_avg = outputs.view(ncrops, -1).mean(0) # avg over crops score = F.softmax(outputs_avg) _, predicted = torch.max(outputs_avg.data, 0) prob = score.data.cpu().numpy() anim_prob += prob # e.g. # anim_prob += model(face) anim_prob = anim_prob / sum(anim_prob) anim_name = np.random.choice(config.anim_names, 1, p=anim_prob)[0] print('Playing Animation: ', anim_name) print('The Expression is: ', class_names[anim_prob.tolist().index(max(anim_prob))]) robot.anim.play_animation(anim_name)
def main(): global args, best_prec1 args = parser.parse_args() # torch.cuda.set_device(args.gpu) if args.tensorboard: print("Using TensorBoard") configure("exp/%s" % (args.name)) # Data loading code if args.augment: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad( Variable(x.unsqueeze(0), requires_grad=False, volatile=True), (4, 4, 4, 4), mode='replicate').data.squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) kwargs = {'num_workers': 1, 'pin_memory': True} assert (args.dataset == 'cifar10' or args.dataset == 'cifar100') train_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = WideResNetMulti(args.layers, args.dataset == 'cifar10' and 10 or 100, args.num_rotate_classes, args.widen_factor, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch + 1) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print 'Best accuracy: ', best_prec1
def get_loader(img_root, img_size, batch_size, mode='train', num_thread=1): shuffle = False mean_bgr = torch.Tensor(3, 256, 256) mean_bgr[0, :, :] = 104.008 # B mean_bgr[1, :, :] = 116.669 # G mean_bgr[2, :, :] = 122.675 # R depth_mean_bgr = torch.Tensor(1, 256, 256) depth_mean_bgr[0, :, :] = 115.8695 if mode == 'train': transform = trans.Compose([ # trans.ToTensor image -> [0,255] trans.ToTensor_BGR(), trans.Lambda(lambda x: x - mean_bgr) ]) depth_transform = trans.Compose([ # trans.ToTensor image -> [0,255] trans.ToTensor(), trans.Lambda(lambda x: x - depth_mean_bgr) ]) t_transform = trans.Compose([ # transform.ToTensor label -> [0,1] transforms.ToTensor(), ]) label_32_transform = trans.Compose([ trans.Scale((32, 32), interpolation=Image.NEAREST), transforms.ToTensor(), ]) label_64_transform = trans.Compose([ trans.Scale((64, 64), interpolation=Image.NEAREST), transforms.ToTensor(), ]) label_128_transform = trans.Compose([ trans.Scale((128, 128), interpolation=Image.NEAREST), transforms.ToTensor(), ]) shuffle = True else: transform = trans.Compose([ trans.Scale((img_size, img_size)), trans.ToTensor_BGR(), trans.Lambda(lambda x: x - mean_bgr) ]) depth_transform = trans.Compose([ trans.Scale((img_size, img_size)), trans.ToTensor(), trans.Lambda(lambda x: x - depth_mean_bgr) ]) t_transform = trans.Compose([ trans.Scale((img_size, img_size), interpolation=Image.NEAREST), transforms.ToTensor(), ]) if mode == 'train': dataset = ImageData(img_root, transform, depth_transform, t_transform, label_32_transform, label_64_transform, label_128_transform, mode) else: dataset = ImageData(img_root, transform, depth_transform, t_transform, label_32_transform=None, label_64_transform=None, label_128_transform=None, mode=mode) data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_thread) return data_loader
def get_loader(img_root, label_root, img_size, batch_size, mode='train', num_thread=1): shuffle = False mean = torch.Tensor(3, 256, 256) mean[0, :, :] = 125.5325 mean[1, :, :] = 118.1743 mean[2, :, :] = 101.3507 # mean = torch.Tensor([123.68, 116.779, 103.939]).view(3, 1, 1) / 255 if mode == 'train': transform = trans.Compose([ trans.Scale((img_size, img_size)), # trans.ToTensor image -> [0,255] trans.ToTensor(), trans.Lambda(lambda x: x - mean) ]) t_transform = trans.Compose([ trans.Scale((img_size, img_size)), # transform.ToTensor label -> [0,1] transforms.ToTensor(), # transforms.Lambda(lambda x: torch.round(x)) # TODO: it maybe unnecessary ]) label_32_transform = trans.Compose([ trans.Scale((32, 32)), transforms.ToTensor(), ]) label_64_transform = trans.Compose([ trans.Scale((64, 64)), transforms.ToTensor(), ]) label_128_transform = trans.Compose([ trans.Scale((128, 128)), transforms.ToTensor(), ]) shuffle = True else: # define transform to images transform = trans.Compose([ trans.Scale((img_size, img_size)), trans.ToTensor(), trans.Lambda(lambda x: x - mean) ]) # define transform to ground truth t_transform = trans.Compose([ trans.Scale((img_size, img_size)), transforms.ToTensor(), #transforms.Lambda(lambda x: torch.round(x)) # TODO: it maybe unnecessary ]) if mode == 'train': dataset = ImageData(img_root, label_root, transform, t_transform, label_32_transform, label_64_transform, label_128_transform) # print(dataset.image_path) data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_thread) return data_loader else: dataset = ImageData(img_root, label_root, transform, t_transform, label_32_transform=None, label_64_transform=None, label_128_transform=None) # print(dataset.image_path) data_loader = data.DataLoader(dataset=dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_thread) return data_loader
# laplacia_weights = [] # laplacia_weights = [1e3 / n ** 2 for n in [512]] # laplacia_weights = [3 / n for n in [1, 2, 3]] # vgg definition that conveniently let's you grab the outputs from any layer # %% # gram matrix and loss # %% prep = transforms.Compose([ transforms.Resize(args.img_size), transforms.ToTensor(), transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]), # turn to BGR transforms.Normalize( mean=[0.40760392, 0.45795686, 0.48501961], # subtract imagenet mean std=[1, 1, 1]), transforms.Lambda(lambda x: x.mul_(255)), ]) prep_hr = transforms.Compose([ transforms.Resize(args.img_size_hr), transforms.ToTensor(), transforms.Lambda(lambda x: x[torch.LongTensor([2, 1, 0])]), # turn to BGR transforms.Normalize( mean=[0.40760392, 0.45795686, 0.48501961], # subtract imagenet mean std=[1, 1, 1]), transforms.Lambda(lambda x: x.mul_(255)),