def load_data_with_MXNet_dali(batch_size ,args ,dataset = 'Faces_emore'): transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) root = '/data/face_dataset/LFW/lfw_align_112' file_list = '/data/face_dataset/LFW/pairs.txt' dataset_LFW = LFW(root, file_list, transform=transform) root = '/data/face_dataset/CFP-FP/CFP_FP_aligned_112' file_list = '/data/face_dataset/CFP-FP/cfp_fp_pair.txt' dataset_CFP_FP = CFP_FP(root, file_list, transform=transform) root = '/data/face_dataset/AgeDB-30/agedb30_align_112' file_list = '/data/face_dataset/AgeDB-30/agedb_30_pair.txt' dataset_AgeDB30 = AgeDB30(root, file_list, transform=transform) if dataset == 'CASIA': root = 'data_set/CASIA_Webface_Image' file_list = 'data_set/CASIA_Webface_Image/webface_align_112.txt' dataset_train = CASIAWebFace(root, file_list, transform=transform) elif dataset == 'Faces_emore': path = "/data/face_dataset/" pipes = MXNetReaderPipeline(path,batch_size=batch_size, num_threads=4, device_id = args.local_rank, num_gpus = 2) pipes.build() train_loader = DALIGenericIterator(pipes, ['data', 'label'], pipes.epoch_size("Reader")) root = '/data/face_dataset/imgs/' file_list = '/data/face_dataset/imgs/faces_emore_align_112.txt' dataset_train = MS1M(root, file_list, transform=transform) else: raise NameError('no training data exist!') dataloaders = {'train_dataset': train_loader, 'LFW': data.DataLoader(dataset_LFW, batch_size=batch_size,pin_memory=True, shuffle=False), 'CFP_FP': data.DataLoader(dataset_CFP_FP, batch_size=batch_size,pin_memory=True, shuffle=False), 'AgeDB30': data.DataLoader(dataset_AgeDB30, batch_size=batch_size,pin_memory=True, shuffle=False)} dataset = {'train_dataset': dataset_train,'LFW': dataset_LFW, 'CFP_FP': dataset_CFP_FP, 'AgeDB30': dataset_AgeDB30} dataset_sizes = {'train': len(dataset_train),'LFW': len(dataset_LFW), 'CFP_FP': len(dataset_CFP_FP), 'AgeDB30': len(dataset_AgeDB30)} print('training and validation data loaded') return dataloaders, dataset_sizes, dataset
def load_data_dataparallel(batch_size, dataset = 'Faces_emore'): transform = transforms.Compose([ #transforms.RandomHorizontalFlip(), # transforms.Resize((120, 120), interpolation=3), # transforms.RandomCrop(112), transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) # range [0.0, 1.0] -> [-1.0,1.0] train_transform = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(), transforms.Resize((120, 120), interpolation=3), transforms.RandomCrop(112), transforms.ToTensor(), Cutout(n_holes=1, length=16), transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) ]) root = '/data/face_dataset/LFW/lfw_align_112' file_list = '/data/face_dataset/LFW/pairs.txt' dataset_LFW = LFW(root, file_list, transform=transform) root = '/data/face_dataset/CFP-FP/CFP_FP_aligned_112' file_list = '/data/face_dataset/CFP-FP/cfp_fp_pair.txt' dataset_CFP_FP = CFP_FP(root, file_list, transform=transform) root = '/data/face_dataset/AgeDB-30/agedb30_align_112' file_list = '/data/face_dataset/AgeDB-30/agedb_30_pair.txt' dataset_AgeDB30 = AgeDB30(root, file_list, transform=transform) if dataset == 'CASIA': root = 'data_set/CASIA_Webface_Image' file_list = 'data_set/CASIA_Webface_Image/webface_align_112.txt' dataset_train = CASIAWebFace(root, file_list, transform=transform) elif dataset == 'Faces_emore': root = '/data/face_dataset/imgs' file_list = '/data/face_dataset/imgs/faces_emore_align_112.txt' dataset_train = MS1M(root, file_list, transform=train_transform) # train_sampler = torch.utils.data.distributed.DistributedSampler(dataset_train) else: raise NameError('no training data exist!') dataset_size = len(dataset_train) train_size = int(0.8 * dataset_size) valid_size = dataset_size - train_size train_subset, valid_subset = torch.utils.data.random_split(dataset_train, [train_size, valid_size]) # 'train_dataset': data.DataLoader(dataset_train, batch_size=batch_size, sampler=train_sampler), dataloaders = {'train_dataset': data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True), 'train_subset': data.DataLoader(train_subset, batch_size=batch_size, shuffle=True), 'valid_subset': data.DataLoader(valid_subset, batch_size=batch_size, shuffle=True), 'LFW': data.DataLoader(dataset_LFW, batch_size=batch_size, shuffle=False), 'CFP_FP': data.DataLoader(dataset_CFP_FP, batch_size=batch_size, shuffle=False), 'AgeDB30': data.DataLoader(dataset_AgeDB30, batch_size=batch_size, shuffle=False)} dataset = {'train_dataset': dataset_train,'train_subset': train_subset,'valid_subset': valid_subset,'LFW': dataset_LFW, 'CFP_FP': dataset_CFP_FP, 'AgeDB30': dataset_AgeDB30} dataset_sizes = {'train': len(dataset_train),'train_subset':len(train_subset),'valid_subset':len(valid_subset),'LFW': len(dataset_LFW), 'CFP_FP': len(dataset_CFP_FP), 'AgeDB30': len(dataset_AgeDB30)} print('training and validation data loaded') return dataloaders, dataset_sizes, dataset