def script_recognition(cf): use_cuda = torch.cuda.is_available() device = torch.device('cuda' if use_cuda else 'cpu') # the_augmentor = TheAugmentor(probability=.5, grid_width=3, # grid_height=3, magnitude=8) sheer_tsfm = transforms.RandomAffine(0, shear=(-30, 10)) random_sheer = transforms.RandomApply( [sheer_tsfm], p=0.7) # will only be used if cf.use_distortion_augmentor is True image_transform = transforms.Compose([ ImageThinning(p=cf.thinning_threshold) if cf.thinning_threshold < 1 else NoneTransform(), random_sheer if cf.use_distortion_augmentor else NoneTransform(), OverlayImage() if cf.overlay_handwritting_on_STL_img else NoneTransform( ), # Add random image background here, to mimic scenetext, or, let's call it scenehandwritten # transforms.Normalize( (0.5, 0.5, 0.5), (0.25, 0.25 , 0.25) ) if cf.normalize_images else NoneTransform(), PadImage((cf.MAX_IMAGE_WIDTH, cf.MAX_IMAGE_HEIGHT)) if cf.pad_images else NoneTransform(), transforms.Scale(cf.input_size) if cf.resize_images else NoneTransform(), transforms.ToTensor(), transforms.Lambda(lambda x: x.repeat(3, 1, 1)) if not cf.overlay_handwritting_on_STL_img else NoneTransform( ), # this is becuase the overlay produces an RGB image ]) # if cf.dataset_name == 'WG+IFN': print( '...................IFN & WG datasets ---- The multi-lingual PHOCNET' ) train_set = WG_IFN_Dataset(cf, train=True, transform=image_transform) test_set = WG_IFN_Dataset(cf, train=False, transform=image_transform, data_idx_WG=train_set.data_idx_WG, data_idx_IFN=train_set.data_idx_IFN, complement_idx=True) elif cf.dataset_name == 'IAM+IFN': print( '................... IAM & IFN datasets ---- The multi-lingual PHOCNET' ) train_set = IAM_IFN_Dataset( cf, train=True, mode='test', transform=image_transform ) # mode is one of train, test, or validate test_set = IAM_IFN_Dataset( cf, train=False, mode='validate', transform=image_transform, # loading iam valid set for testing data_idx_IFN=train_set.data_idx_IFN, complement_idx=True) else: exit('only works for WG+IFN and IAM+IFN script recognition') # plt.imshow(train_set[29][0], cmap='gray'); plt.show() if cf.use_weight_to_balance_data: print('Adding weights to balance the data') # train_set = add_weights_of_words(train_set) train_set.add_weights_of_words() train_loader = torch.utils.data.DataLoader(train_set, batch_size=cf.batch_size_train, shuffle=cf.shuffle, num_workers=cf.num_workers) test_loader = torch.utils.data.DataLoader(test_set, batch_size=cf.batch_size_test, shuffle=False, num_workers=cf.num_workers) model = make_model( cf.model_name, pretrained=cf.pretrained, num_classes=train_set.num_classes(), input_size=cf.input_size, dropout_p=cf.dropout_probability, ) model = model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD( model.parameters(), lr=cf.learning_rate, momentum=cf.momentum, nesterov=cf.use_nestrov_moment, weight_decay=cf.weight_decay, dampening=cf.damp_moment if not (cf.use_nestrov_moment) else 0) print('--- Total no. of params in model ', count_model_parameters(model), '-------') def train(epoch): total_loss = 0 total_size = 0 model.train() for batch_idx, (data, target, word_str, weight) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) total_loss += loss.item() total_size += data.size(0) loss.backward() optimizer.step() if batch_idx % cf.batch_log == 0: print( 'Train Epoch: {} [{}/{} ({:.0f}%)]\tAverage loss: {:.7f}'. format(epoch, batch_idx * len(data), len(train_loader.dataset), 100. * batch_idx / len(train_loader), total_loss / total_size)) def test(test_loader): model.eval() test_loss = 0 correct = 0 pred_all = torch.tensor([], dtype=torch.float32, device=device) target_all = torch.tensor([], dtype=torch.float32, device=device) word_str_all = () with torch.no_grad(): for data, target, word_str, weight in test_loader: # weight is trivial here data, target = data.to(device), target.to(device) output = model(data) ''' loss = criterion(output.float(), target.float()) loss = criterion(output, target ) test_loss += loss.item() ''' output = F.sigmoid(output) target = target.type(torch.cuda.LongTensor) pred = output.data.max(1, keepdim=True)[1] # pred = pred.type(torch.cuda.DoubleTensor) correct += pred.eq( target.data.view_as(pred)).long().cpu().sum().item() print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)'. format( test_loss, correct, len(test_loader.dataset) * pred.size()[1], 100. * correct / (len(test_loader.dataset) * pred.size()[1]))) return 0 # to be used in case we want to try different distances later scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, cf.lr_milestones, gamma=cf.lr_gamma) # print('PHOC length', train_set.) print('Chance level performance \n') test(test_loader) # nice to know the performance prior to training for epoch in range(1, cf.epochs + 1): scheduler.step() print("lr = ", scheduler.get_lr(), end="") # to be used with MultiStepLR train(epoch) if not (epoch % cf.testing_print_frequency): test(test_loader) result = test(test_loader) return result, train_set, test_set, train_loader, test_loader # returned to be checked from command console, this is provisary
if image.shape[-1] == 4: image = torch.tensor(rgba2rgb(image),dtype=torch.float32) else: image = torch.tensor(image,dtype=torch.float32) return image.permute(2,0,1) def load_mask(path): mask = imread(path) mask = torch.tensor(rgb2gray(mask),dtype=torch.float32) return mask #%% generate data Rotate_90 = transforms.RandomApply(torch.nn.ModuleList([transforms.RandomRotation((90,90)),]),p=0.5) Rotate_180 = transforms.RandomApply(torch.nn.ModuleList([transforms.RandomRotation((180,180)),]),p=0.5) Rotate_270 = transforms.RandomApply(torch.nn.ModuleList([transforms.RandomRotation((270,270)),]),p=0.5) transformer = transforms.Compose([transforms.RandomCrop(config.input_size), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), Rotate_90, Rotate_180, Rotate_270]) def gen_data(image,mask): data = torch.cat((image, mask.unsqueeze(0)), dim=0)
"translate": transforms.RandomAffine(5, translate=(0, 0.1)), "shear": transforms.RandomAffine(5, shear=5), "pad": transforms.Compose( (transforms.Pad(5, padding_mode="reflect"), transforms.RandomCrop(224))) # "rotation": transforms.RandomRotation(30), # "random_crop": transforms.RandomCrop(224, padding = 4), # "horizontal_flip": transforms.RandomHorizontalFlip(p = 0.5), # "vertical_flip": transforms.RandomVerticalFlip(p = 0.5), } cutout_transform = cutout(mask_size=16, p=1., cutout_inside=True) random_apply = transforms.RandomApply(list(transformations.values())) class MSIDataset(Dataset): def __init__(self, dataset, data_dir, data_mode, augmentation=None): # Dataset will have form (ImageName, label) self.dataset = dataset self.augmentation = augmentation self.data_dir = data_dir self.data_mode = data_mode self.int_labels = None self.str_to_int() # Necessary transformation self.to_tensor = transforms.ToTensor()
def get_train_transforms(self, method, dataset): """Returns the training torchvision transformations for each dataset/method. If a new method or dataset is added, this file should by modified accordingly. Args: method: The name of the method. dataset: The name of the dataset. Returns: train_transform: An object of type torchvision.transforms. """ self._check(dataset) if (dataset == "cifar10"): normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262]) side = 32 padding = 4 cutout = 0.25 elif (dataset == "stl10"): normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) side = 96 padding = 12 cutout = 0.111 elif (dataset == "cifar100" or dataset == "supercifar100"): normalize = transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276]) side = 32 padding = 4 cutout = 0.0625 elif (dataset == "tiny"): #Image-Net --> mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]) side = 64 padding = 8 elif (dataset == "slim"): #Image-Net --> mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.25, 0.25, 0.25]) side = 64 padding = 8 elif (dataset == "dldataset"): #Image-Net --> mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] normalize = transforms.Normalize(mean=[0.4837, 0.4531, 0.4015], std=[0.2212, 0.2165, 0.2156]) side = 96 padding = 12 if (method == "relationnet" or method == "simclr"): color_jitter = transforms.ColorJitter(brightness=0.8, contrast=0.8, saturation=0.8, hue=0.2) rnd_color_jitter = transforms.RandomApply([color_jitter], p=0.8) rnd_gray = transforms.RandomGrayscale(p=0.2) rnd_resizedcrop = transforms.RandomResizedCrop( size=side, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=2) rnd_hflip = transforms.RandomHorizontalFlip(p=0.5) #rnd_rot = transforms.RandomRotation(10., resample=2), train_transform = transforms.Compose([ rnd_resizedcrop, rnd_hflip, rnd_color_jitter, rnd_gray, transforms.ToTensor(), normalize ]) elif (method == "deepinfomax"): train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) elif (method == "standard" or method == "rotationnet" or method == "deepcluster"): train_transform = transforms.Compose([ transforms.RandomCrop(side, padding=padding), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ]) elif (method == "finetune"): rnd_affine = transforms.RandomApply([ transforms.RandomAffine(18, scale=(0.9, 1.1), translate=(0.1, 0.1), shear=10, resample=Image.BILINEAR, fillcolor=0) ], p=0.5) train_transform = transforms.Compose( [ #transforms.RandomCrop(side, padding=padding), transforms.RandomHorizontalFlip(), rnd_affine, transforms.ToTensor(), normalize, #transforms.RandomErasing(p=0.5, scale=(0.02, 0.33))]) #pytorch default transforms.RandomErasing(p=0.5, scale=(cutout, cutout), ratio=(1.0, 1.0)) ]) elif (method == "lineval"): train_transform = transforms.Compose( [transforms.ToTensor(), normalize]) else: raise Exception("[ERROR] The method " + str(method) + " is not supported!") return train_transform
def __init__(self, svhn_path, frac=0.5, shuffle=True, augment=True, use_cuda=False, dload_dataset=False): """ frac : float fraction of dataset to use for training """ self.net = Net_Full() normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if augment: transform = normalize else: transform = augtrans trainset = datasets.SVHN(svhn_path, split='train', transform=transform, target_transform=None, download=dload_dataset) self.trainset = trainset testset = datasets.SVHN(svhn_path, split='test', transform=normalize, target_transform=None, download=dload_dataset) self.testset = testset trainset_size = len(self.trainset) indices = list(range(trainset_size)) end = int(np.floor(frac * trainset_size)) if shuffle: np.random.shuffle(indices) train_indices = indices[:end] self.train_sampler = SubsetRandomSampler(train_indices) if use_cuda: if torch.cuda.is_available(): self.device = torch.device('cuda') else: print("CUDA not available") self.device = torch.device('cpu') else: self.device = torch.device('cpu') self.net.to(self.device)
# 设置数据集路径 split_dir = os.path.join("data", "cat_dog_split") train_dir = os.path.join(split_dir, "train") valid_dir = os.path.join(split_dir, "valid") test_dir = os.path.join(split_dir, "test") # 设置数据预处理和数据增强方法 norm_mean = [0.485, 0.456, 0.406] norm_std = [0.229, 0.224, 0.225] train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomGrayscale(p=0.1), transforms.RandomApply([transforms.RandomCrop(200, padding=24, padding_mode='reflect')], p=0.2), transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std), ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(norm_mean, norm_std), ]) # 构建dataset train_data = CatDogDataset(data_dir=train_dir, transform=train_transform) valid_data = CatDogDataset(data_dir=valid_dir, transform=test_transform) test_data = CatDogDataset(data_dir=test_dir, transform=test_transform)
def load_transforms(name): """Load data transformations. Note: - Gaussian Blur is defined at the bottom of this file. """ _name = name.lower() if _name == "default": transform = transforms.Compose([ transforms.RandomCrop(32, padding=8), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) elif _name == "cifar": transform = transforms.Compose([ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply( [transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor() ]) elif _name == "svhn": transform = transforms.Compose([ transforms.RandomResizedCrop(32), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply( [transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor() ]) elif _name == "mnist": transform = transforms.Compose([ transforms.RandomChoice([ transforms.RandomAffine((-90, 90)), transforms.RandomAffine(0, translate=(0.2, 0.4)), transforms.RandomAffine(0, scale=(0.8, 1.1)), transforms.RandomAffine(0, shear=(-20, 20)) ]), GaussianBlur(kernel_size=3), transforms.ToTensor() ]) elif _name == "stl10": transform = transforms.Compose([ transforms.RandomResizedCrop(96), transforms.RandomHorizontalFlip(), transforms.RandomApply( [transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), transforms.RandomGrayscale(p=0.2), GaussianBlur(kernel_size=9), transforms.ToTensor() ]) elif _name == "fashionmnist" or _name == "fmnist": transform = transforms.Compose([ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomRotation((-90, 90)), transforms.RandomChoice([ transforms.RandomAffine((-90, 90)), transforms.RandomAffine(0, translate=(0.2, 0.4)), transforms.RandomAffine(0, scale=(0.8, 1.1)), transforms.RandomAffine(0, shear=(-20, 20)) ]), GaussianBlur(kernel_size=3), transforms.ToTensor() ]) elif _name == "test": transform = transforms.ToTensor() else: raise NameError("{} not found in transform loader".format(name)) return transform
def imagenet_dataloader(args, dataset_paths): ''' Loads the ImageNet or TinyImageNet dataset performing augmentaions. Generates splits of the training set to produce a validation set. args: args (dict): Program/commandline arguments. dataset_paths (dict): Paths to each datset split. Returns: dataloaders (): pretrain,train,valid,train_valid,test set split dataloaders. ''' # guassian_blur from https://github.com/facebookresearch/moco/ guassian_blur = transforms.RandomApply([GaussianBlur(args.blur_sigma)], p=args.blur_p) color_jitter = transforms.ColorJitter(0.8 * args.jitter_d, 0.8 * args.jitter_d, 0.8 * args.jitter_d, 0.2 * args.jitter_d) rnd_color_jitter = transforms.RandomApply([color_jitter], p=args.jitter_p) rnd_grey = transforms.RandomGrayscale(p=args.grey_p) # Base train and test augmentaions transf = { 'train': transforms.Compose([ transforms.RandomResizedCrop((args.crop_dim, args.crop_dim)), rnd_color_jitter, rnd_grey, guassian_blur, transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]), 'test': transforms.Compose([ transforms.CenterCrop((args.crop_dim, args.crop_dim)), transforms.ToTensor(), transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) ]) } config = {'train': True, 'test': False} datasets = {i: ImageFolder(root=dataset_paths[i]) for i in config.keys()} # weighted sampler weights for full(f) training set f_s_weights = sample_weights(datasets['train'].targets) # return data, labels dicts for new train set and class-balanced valid set # 50 is the num of samples to be split into the val set for each class (1000) data, labels = random_split_image_folder( data=np.asarray(datasets['train'].samples), labels=datasets['train'].targets, n_classes=args.n_classes, n_samples_per_class=np.repeat(50, args.n_classes).reshape(-1)) # torch.from_numpy(np.stack(labels)) this takes the list of class ids and turns them to tensor.long # original full training set datasets['train_valid'] = CustomDataset( data=np.asarray(datasets['train'].samples), labels=torch.from_numpy(np.stack(datasets['train'].targets)), transform=transf['train'], two_crop=args.twocrop) # original test set datasets['test'] = CustomDataset(data=np.asarray(datasets['test'].samples), labels=torch.from_numpy( np.stack(datasets['test'].targets)), transform=transf['test'], two_crop=False) # make new pretraining set without validation samples datasets['pretrain'] = CustomDataset(data=np.asarray(data['train']), labels=labels['train'], transform=transf['train'], two_crop=args.twocrop) # make new finetuning set without validation samples datasets['train'] = CustomDataset(data=np.asarray(data['train']), labels=labels['train'], transform=transf['train'], two_crop=False) # make class balanced validation set for finetuning datasets['valid'] = CustomDataset(data=np.asarray(data['valid']), labels=labels['valid'], transform=transf['test'], two_crop=False) # weighted sampler weights for new training set s_weights = sample_weights(datasets['pretrain'].labels) config = { 'pretrain': WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True), 'train': WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True), 'train_valid': WeightedRandomSampler(f_s_weights, num_samples=len(f_s_weights), replacement=True), 'valid': None, 'test': None } if args.distributed: config = { 'pretrain': DistributedSampler(datasets['pretrain']), 'train': DistributedSampler(datasets['train']), 'train_valid': DistributedSampler(datasets['train_valid']), 'valid': None, 'test': None } dataloaders = { i: DataLoader(datasets[i], sampler=config[i], num_workers=8, pin_memory=True, drop_last=True, batch_size=args.batch_size) for i in config.keys() } return dataloaders
def cifar_dataloader(args, dataset_paths): ''' Loads the CIFAR10 or CIFAR100 dataset performing augmentaions. Generates splits of the training set to produce a validation set. args: args (dict): Program/commandline arguments. dataset_paths (dict): Paths to each datset split. Returns: dataloaders (): pretrain,train,valid,train_valid,test set split dataloaders. ''' color_jitter = transforms.ColorJitter(0.8 * args.jitter_d, 0.8 * args.jitter_d, 0.8 * args.jitter_d, 0.2 * args.jitter_d) rnd_color_jitter = transforms.RandomApply([color_jitter], p=args.jitter_p) rnd_grey = transforms.RandomGrayscale(p=args.grey_p) # Base train and test augmentaions transf = { 'train': transforms.Compose([ transforms.ToPILImage(), rnd_color_jitter, rnd_grey, transforms.RandomResizedCrop((args.crop_dim, args.crop_dim), scale=(0.25, 1.0)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)) ]), 'pretrain': transforms.Compose([ transforms.ToPILImage(), rnd_color_jitter, rnd_grey, transforms.RandomResizedCrop((args.crop_dim, args.crop_dim)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)) ]), 'test': transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)) ]) } config = {'train': True, 'test': False} if args.dataset == 'cifar10': datasets = { i: CIFAR10(root=dataset_paths[i], transform=transf[i], train=config[i], download=True) for i in config.keys() } val_samples = 500 elif args.dataset == 'cifar100': datasets = { i: CIFAR100(root=dataset_paths[i], transform=transf[i], train=config[i], download=True) for i in config.keys() } val_samples = 100 # weighted sampler weights for full(f) training set f_s_weights = sample_weights(datasets['train'].targets) # return data, labels dicts for new train set and class-balanced valid set # 500 is the num of samples to be split into the val set for each class (10) data, labels = random_split(data=datasets['train'].data, labels=datasets['train'].targets, n_classes=args.n_classes, n_samples_per_class=np.repeat( val_samples, args.n_classes).reshape(-1)) # save original full training set datasets['train_valid'] = datasets['train'] # make new pretraining set without validation samples datasets['pretrain'] = CustomDataset(data=data['train'], labels=labels['train'], transform=transf['pretrain'], two_crop=args.twocrop) # make new finetuning set without validation samples datasets['train'] = CustomDataset(data=data['train'], labels=labels['train'], transform=transf['train'], two_crop=False) # make class balanced validation set for finetuning datasets['valid'] = CustomDataset(data=data['valid'], labels=labels['valid'], transform=transf['test'], two_crop=False) # weighted sampler weights for new training set s_weights = sample_weights(datasets['pretrain'].labels) config = { 'pretrain': WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True), 'train': WeightedRandomSampler(s_weights, num_samples=len(s_weights), replacement=True), 'train_valid': WeightedRandomSampler(f_s_weights, num_samples=len(f_s_weights), replacement=True), 'valid': None, 'test': None } if args.distributed: config = { 'pretrain': DistributedSampler(datasets['pretrain']), 'train': DistributedSampler(datasets['train']), 'train_valid': DistributedSampler(datasets['train_valid']), 'valid': None, 'test': None } dataloaders = { i: DataLoader(datasets[i], sampler=config[i], num_workers=8, pin_memory=True, drop_last=True, batch_size=args.batch_size) for i in config.keys() } return dataloaders
import random import torchvision.transforms as transforms from PIL import ImageFilter class GaussianBlur(object): """Gaussian blur augmentation in SimCLR https://arxiv.org/abs/2002.05709""" def __init__(self, sigma=[.1, 2.]): self.sigma = sigma def __call__(self, x): sigma = random.uniform(self.sigma[0], self.sigma[1]) x = x.filter(ImageFilter.GaussianBlur(radius=sigma)) return x single_frame_augmentation = [ transforms.RandomResizedCrop(112, scale=(0.4, 1.)), transforms.RandomApply([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu # suppress printing if not master if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model global logger try: rank = dist.get_rank() except: rank = 0 logger = setup_logger(output=args.out_folder, distributed_rank=rank, name="moco") if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass logger.info = print_pass if args.gpu is not None: logger.info("Use GPU: {} for training".format(args.gpu)) logger.info("=> creating model '{}'".format(args.arch)) assert args.model_version in moco_names, "{} not implemented, valid choice : {}".format(args.model_version, "|".join(moco_names)) if args.arch.endswith("W"): raise NotImplementedError else: model = moco.__dict__[args.model_version]( models.__dict__[args.arch], args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp) logger.info("Start training with: ") logger.info(args) logger.info("Commit Hash is: ") try: logger.info(get_git_revision_hash()) except: logger.info("Not inside a git repo") logger.info(model) print("Set GPU device: {}".format(args.gpu)) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging # raise NotImplementedError("Only DistributedDataParallel is supported.") else: # AllGather implementation (batch shuffle, queue update, etc.) in # this code only supports DistributedDataParallel. raise NotImplementedError("Only DistributedDataParallel is supported.") optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): logger.info("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) logger.info("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: logger.info("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.aug_plus: # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] else: # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] if args.outer_shuffle: raise NotImplementedError else: train_dataset = datasets.ImageFolder( traindir, MultiCropsTransform(transforms.Compose(augmentation), k_crops=args.k_crops)) if args.dataset == "imagenet100": raise NotImplementedError if args.max_imgs_per_cls > 0: raise NotImplementedError class_stats = [os.path.basename(os.path.dirname(m[0])) for m in train_dataset.imgs] logger.info("Training data stats") logger.info(Counter(class_stats).most_common()) if args.distributed: if args.outer_shuffle: raise NotImplementedError elif args.repeat_datasets > 0: raise NotImplementedError else: train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) else: train_sampler = None if args.outer_shuffle: raise NotImplementedError else: train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if args.amp: raise NotImplementedError else: scaler = None for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args, scaler=scaler) if not args.multiprocessing_distributed or (args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): if epoch % args.save_interval != 0 and epoch < args.epochs - 5: continue out_folder = args.out_folder os.makedirs(out_folder, exist_ok=True) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename=os.path.join(out_folder, 'checkpoint_{:04d}.pth.tar'.format(epoch)))
def __init__(self): self.trans = transforms.RandomApply([lambda x: hflip(x)], p=0.5)
project_out = args.pro_out linear_in = args.linear_in eval_routine = args.eval_routine record_cnn = {"train_loss": []} record_clf = {"train_loss": [], "train_acc": [], "test_loss": [], "test_acc": []} # ========== [data] ========== train_aug = transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomResizedCrop(size=32), transforms.RandomApply([transforms.ColorJitter(brightness=0.8 * aug_s, contrast=0.8 * aug_s, saturation=0.8 * aug_s, hue=0.2 * aug_s)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.ToTensor()] ) trainset = ImageDataset( root_dir=train_root, class_file=classFile, transforms=train_aug ) trainloader = DataLoader( trainset, batch_size=batch_size, shuffle=True,
WD_EXCLUDE_BN_BIAS=True, ), CHECKPOINT_PERIOD=10, IMS_PER_BATCH=1024, IMS_PER_DEVICE=128, # 8 gpus per node BATCH_SUBDIVISIONS=1, # Simulate Batch Size 4096 ), INPUT=dict(AUG=dict(TRAIN_PIPELINES=dict( q=[ ("RepeatList", dict(transforms=[ ("Torch_Compose", transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.RandomApply( [transforms.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), transforms.RandomGrayscale(p=0.2), ])), ("RandomGaussianBlur", dict(sigma=[.1, 2.], p=1.0)), ("RandomSolarization", dict(p=0.0)), ("Torch_Compose", transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ])), ], repeat_times=1)), ], k=[ ("RepeatList",
def Train(): random.seed(10) torch.manual_seed(10) # NetSet net = SiameseNetwork().cuda() criterion = ContrastiveLoss() optimizer = optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=Config.learning_rate) # TrainSet folder_dataset = dset.ImageFolder(root=Config.training_dir) siamese_dataset = SiameseNetworkDataset(imageFolderDataset=folder_dataset, transform=transforms.RandomApply([ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.RandomResizedCrop((100, 100)), transforms.RandomRotation(180)], p=0.0), should_invert=False) train_dataloader = DataLoader(siamese_dataset, num_workers=0, batch_size=Config.train_batch_size, shuffle=True) # TestSet folder_dataset_test = dset.ImageFolder(root=Config.testing_dir) siamese_dataset = SiameseNetworkDataset(imageFolderDataset=folder_dataset_test, should_invert=False) test_dataloader = DataLoader(siamese_dataset, num_workers=0, batch_size=1, shuffle=True) counter = [] loss_history = [] loss_test = [] iteration_number = 0 for epoch in range(0, Config.train_number_epochs): for i, data in enumerate(train_dataloader, 0): img_str0, img_str1, img0, img1, label = data img0, img1, label = img0.cuda(), img1.cuda(), label.cuda() optimizer.zero_grad() output1, output2 = net(img0, img1) loss_contrastive = criterion(output1, output2, label) loss_contrastive.backward() optimizer.step() if i == len(train_dataloader) - 1: # train loss print("Epoch number {}\n Train loss {}".format(epoch, loss_contrastive.item())) iteration_number += 10 counter.append(iteration_number) loss_history.append(loss_contrastive.item()) # test loss loss_t = 0 for i, data in enumerate(test_dataloader, 0): img_str0, img_str1, x0, x1, label = data x0, x1, label = x0.cuda(), x1.cuda(), label.cuda() output1, output2 = net(x0, x1) loss_contrastive = criterion(output1, output2, label) loss_t += loss_contrastive.item() print(" Test loss {}\n".format(loss_t / len(test_dataloader))) loss_test.append(loss_t / len(test_dataloader)) show_plot(counter, loss_history, loss_test) return net
def get_dataloaders(augmentations, batch=1024, kfold=0, loss_type='icl', get_train=False): """ input: augmentations: the list of the augmentations you want applied to the data. batch = batchsize, kfold, which fold you want to look at (0, 1,2 3, or 4) get_train, whether or not you want the train data. Use this when loading the data to train linear classifiers, slash when you're loading the final classifier. """ if args.dataid == "imagenet": train_dataset = datasets.ImageFolder(traindir, transformations) elif args.dataid == "cifar10" or args.dataid == "svhn": # THe default training transforms we use when training the CIFAR10 network. transform_train = transforms.Compose([ transforms.RandomResizedCrop(28, scale=(0.2, 1.)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD), ]) # If we're evaluating ICL, its only fair to do so with the ICL augmentations if loss_type == "icl": random_resized_crop = transforms.RandomResizedCrop(28, scale=(0.2, 1.)) if 'rrc' in args.base: transform_train = transforms.Compose([ random_resized_crop, transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD) ]) else: transform_train = transforms.Compose([ random_resized_crop, transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply( [moco.loader.GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD), ]) # Insert the new transforms in to the training transforms. transform_train.transforms.insert(0, Augmentation(augmentations)) # Use the twocrops transform. if loss_type == "icl": transform_train = moco.loader.TwoCropsTransform(transform_train) else: raise NotImplementedError( "Support for the following dataset is not yet implemented: {}". format(args.dataid)) if get_train: train_dataset = torchvision.datasets.CIFAR10(args.data, transform=transform_train, download=True) # In FAA They use Train Transform as well. if args.dataid == "cifar10": val_dataset = torchvision.datasets.CIFAR10(args.data, transform=transform_train, download=True) elif args.dataid == "svhn": val_dataset = torchvision.datasets.SVHN(args.data, transform=transform_train, download=True) if get_train: torch.manual_seed(1337) lengths = [len(train_dataset) // 5] * 5 folds = torch.utils.data.random_split(train_dataset, lengths) folds.pop(kfold) train_dataset = torch.utils.data.ConcatDataset(folds) torch.manual_seed(1337) lengths = [len(val_dataset) // 5] * 5 lengths[-1] = int(lengths[-1] + (len(val_dataset) - np.sum(lengths))) # print(lengths) folds = torch.utils.data.random_split(val_dataset, lengths) val_dataset = folds[kfold] if get_train: train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=batch, shuffle=(train_sampler is None), num_workers=8, pin_memory=True, sampler=train_sampler, drop_last=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch, shuffle=True, num_workers=4, pin_memory=True, drop_last=False, sampler=None) if not get_train: train_loader = None return train_loader, val_loader
def get_transform(name, args): # # GENERIC # if name == "Lambda": t = T.Lambda(**args) # # TRANSFORM MULTI ON IMGS ONLY # elif name == "RandomChoice": tt = config_transform(args["transforms"]) t = T.RandomChoice(tt) elif name == "RandomOrder": tt = config_transform(args["transforms"]) t = T.RandomOrder(tt) # # TRANSFORM MULTI ON TENSOR + PIL IMAGE # elif name == "RandomApply": tt = config_transform(args["transforms"]) t = T.RandomApply(tt, p=args['p']) # # TRANSFORM ON TENSOR + PIL IMAGE # elif name == "ColorJitter": t = T.ColorJitter(**args) elif name == "Grayscale": t = T.Grayscale(**args) elif name == "Pad": t = T.Pad(**args) elif name == "RandomAffine": t = T.RandomAffine(**args) elif name == "RandomGrayscale": t = T.RandomGrayscale(**args) elif name == "RandomHorizontalFlip": t = T.RandomHorizontalFlip(**args) elif name == "RandomPerspective": t = T.RandomPerspective(**args) elif name == 'RandomResizedCrop': t = T.RandomResizedCrop(**args) elif name == "RandomRotation": t = T.RandomRotation(**args) elif name == "RandomVerticalFlip": t = T.RandomVerticalFlip(**args) elif name == "Resize": t = T.Resize(**args) elif name == "GaussianBlur": t = T.GaussianBlur(**args) # # TRANSFORM ON TENSOR ONLY # elif name == "Normalize": t = T.Normalize(**args) elif name == "RandomErasing": t = T.RandomErasing(**args) elif name == "ConvertImageDtype": t = T.ConvertImageDtype(**args) elif name == "ToPILImage": t = T.ToPILImage(**args) elif name == "ToTensor": t = T.ToTensor() else: raise NotImplementedError(transform_name) return t
Args: img (PIL Image): Image to be BiChanneled. Returns: PIL Image: BiChanneled image. """ gray = F.to_grayscale(img, num_output_channels=1) # ent = entropy(np.array(gray),disk(10)) # out = torch.cat(ent,img[1]) return gray transform_train = transforms.Compose([ transforms.Resize((600, 600)), transforms.RandomApply([ torchvision.transforms.RandomRotation(30), transforms.RandomHorizontalFlip() ], 0.7), transforms.ToTensor() ]) ''' Transform Images to specific size and randomly rotate and flip them ''' training_set = Dataset(os.path.join(BASE_TRAIN_PATH, 'merged_tr_vl', 'merged_tr_vl.csv'), os.path.join(BASE_TRAIN_PATH, 'merged_tr_vl'), transform=transform_train) train_generator = data.DataLoader(training_set, **params) # validation_set = Dataset(os.path.join(BASE_VAL_PATH, 'regular-fundus-validation', 'regular-fundus-validation.csv'), # BASE_VAL_PATH, # transform=transform_train)
def train(args): if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.augmix: train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop((args.img_size), scale=(0.5, 2.0)), ]) elif args.speckle: train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop((args.img_size), scale=(0.5, 2.0)), transforms.ToTensor(), transforms.RandomApply( [transforms.Lambda(lambda x: speckle_noise_torch(x))], p=0.5), normalize, ]) else: train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomResizedCrop((args.img_size), scale=(0.5, 2.0)), transforms.ToTensor(), normalize, ]) if args.cutout: train_transform.transforms.append(transforms.RandomErasing()) val_transform = transforms.Compose([ transforms.Scale((args.img_size, args.img_size)), transforms.ToTensor(), normalize, ]) label_transform = transforms.Compose([ ToLabel(), ]) print("Loading Data") if args.dataset == "deepfashion2": loader = fashion2loader( "../", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) if args.augmix: loader = AugMix(loader, args.augmix) if args.stylize: style_loader = fashion2loader( root="../../stylize-datasets/output/", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) loader = torch.utils.data.ConcatDataset([loader, style_loader]) valloader = fashion2loader( "../", split="validation", transform=val_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, ) elif args.dataset == "deepaugment": loader = fashion2loader( "../", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) loader1 = fashion2loader( root="../../deepaugment/EDSR/", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) loader2 = fashion2loader( root="../../deepaugment/CAE/", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) loader = torch.utils.data.ConcatDataset([loader, loader1, loader2]) if args.augmix: loader = AugMix(loader, args.augmix) if args.stylize: style_loader = fashion2loader( root="../../stylize-datasets/output/", transform=train_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, #load=True, ) loader = torch.utils.data.ConcatDataset([loader, style_loader]) valloader = fashion2loader( "../", split="validation", transform=val_transform, label_transform=label_transform, #scales=(-1), occlusion=(-1), zoom=(-1), viewpoint=(-1), negate=(True,True,True,True), scales=args.scales, occlusion=args.occlusion, zoom=args.zoom, viewpoint=args.viewpoint, negate=args.negate, ) else: raise AssertionError print("Loading Done") n_classes = args.num_classes train_loader = data.DataLoader(loader, batch_size=args.batch_size, num_workers=args.num_workers, drop_last=True, shuffle=True) print("number of images = ", len(train_loader)) print("number of classes = ", n_classes) print("Loading arch = ", args.arch) if args.arch == "resnet101": orig_resnet = torchvision.models.resnet101(pretrained=True) features = list(orig_resnet.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) elif args.arch == "resnet50": orig_resnet = torchvision.models.resnet50(pretrained=True) features = list(orig_resnet.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) elif args.arch == "resnet152": orig_resnet = torchvision.models.resnet152(pretrained=True) features = list(orig_resnet.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) elif args.arch == "se": model = se_resnet50(pretrained=True) features = list(model.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) elif args.arch == "BiT-M-R50x1": model = bit_models.KNOWN_MODELS[args.arch](head_size=2048, zero_head=True) model.load_from(np.load(f"{args.arch}.npz")) features = list(model.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) elif args.arch == "BiT-M-R101x1": model = bit_models.KNOWN_MODELS[args.arch](head_size=2048, zero_head=True) model.load_from(np.load(f"{args.arch}.npz")) features = list(model.children()) model = nn.Sequential(*features[0:8]) clsfier = clssimp(2048, n_classes) if args.load == 1: model.load_state_dict( torch.load(args.save_dir + args.arch + str(args.disc) + ".pth")) clsfier.load_state_dict( torch.load(args.save_dir + args.arch + "clssegsimp" + str(args.disc) + ".pth")) gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"].split(",") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') use_dataparallel = len(gpu_ids) > 1 print("using data parallel = ", use_dataparallel, device, gpu_ids) if use_dataparallel: gpu_ids = [int(x) for x in range(len(gpu_ids))] model = nn.DataParallel(model, device_ids=gpu_ids) clsfier = nn.DataParallel(clsfier, device_ids=gpu_ids) model.to(device) clsfier.to(device) if args.finetune: if args.opt == "adam": optimizer = torch.optim.Adam([{ 'params': clsfier.parameters() }], lr=args.lr) else: optimizer = torch.optim.SGD(clsfier.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) else: if args.opt == "adam": optimizer = torch.optim.Adam([{ 'params': model.parameters(), 'lr': args.lr / 10 }, { 'params': clsfier.parameters() }], lr=args.lr) else: optimizer = torch.optim.SGD(itertools.chain( model.parameters(), clsfier.parameters()), args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=True) def cosine_annealing(step, total_steps, lr_max, lr_min): return lr_min + (lr_max - lr_min) * 0.5 * ( 1 + np.cos(step / total_steps * np.pi)) if args.use_scheduler: scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda step: cosine_annealing( step, args.n_epochs * len(train_loader), 1, # since lr_lambda computes multiplicative factor 1e-6 / (args.lr * args.batch_size / 256.))) bceloss = nn.BCEWithLogitsLoss() for epoch in range(args.n_epochs): for i, (images, labels) in enumerate(tqdm(train_loader)): if args.augmix: x_mix1, x_orig = images images = torch.cat((x_mix1, x_orig), 0).to(device) else: images = images[0].to(device) labels = labels.to(device).float() optimizer.zero_grad() outputs = model(images) outputs = clsfier(outputs) if args.augmix: l_mix1, outputs = torch.split(outputs, x_orig.size(0)) if args.loss == "bce": if args.augmix: if random.random() > 0.5: loss = bceloss(outputs, labels) else: loss = bceloss(l_mix1, labels) else: loss = bceloss(outputs, labels) else: print("Invalid loss please use --loss bce") exit() loss.backward() optimizer.step() if args.use_scheduler: scheduler.step() print(len(train_loader)) print("Epoch [%d/%d] Loss: %.4f" % (epoch + 1, args.n_epochs, loss.data)) save_root = os.path.join(args.save_dir, args.arch) if not os.path.exists(save_root): os.makedirs(save_root) if use_dataparallel: torch.save(model.module.state_dict(), os.path.join(save_root, str(args.disc) + ".pth")) torch.save( clsfier.module.state_dict(), os.path.join(save_root, "clssegsimp" + str(args.disc) + ".pth")) else: torch.save(model.state_dict(), os.path.join(save_root, str(args.disc) + ".pth")) torch.save( clsfier.state_dict(), os.path.join(save_root, 'clssegsimp' + str(args.disc) + ".pth"))
def __init__(self, root_path, image_size=512, box_size=512, **kwargs): self.bong_size = 7 if box_size is None: box_size = image_size self.tasks = sorted(os.listdir(os.path.join(root_path, 'images'))) if kwargs.get('split'): path = kwargs.get('split_file') if path is None: path = os.path.join(root_path.rstrip('/'), 'ShapeBongard_FF_split.json') split = json.load(open(path, 'r')) self.tasks = sorted(split[kwargs['split']]) self.n_tasks = len(self.tasks) task_paths = [os.path.join(root_path, 'images', task) for task in self.tasks] self.file_paths = [] self.labels = [] for task_path in task_paths: self.file_paths.extend(sorted(glob.glob(os.path.join(task_path, '1', '*.png')))) self.labels.extend([1 for _ in range(self.bong_size)]) self.file_paths.extend(sorted(glob.glob(os.path.join(task_path, '0', '*.png')))) self.labels.extend([0 for _ in range(self.bong_size)]) assert len(self.file_paths) == self.bong_size * 2 * len(task_paths) assert len(self.labels) == len(self.file_paths) norm_params = {'mean': [0.5], 'std': [0.5]} # grey-scale to [-1, 1] normalize = transforms.Normalize(**norm_params) self.use_moco = False if kwargs.get('moco'): self.use_moco = kwargs['moco'] if self.use_moco: if kwargs.get('aug_plus'): # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 self.transform = TwoCropsTransform(transforms.Compose([ # transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.Resize(image_size), transforms.RandomApply([ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) else: # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978 self.transform = TwoCropsTransform(transforms.Compose([ # transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.Resize(image_size), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ])) else: if kwargs.get('augment'): self.transform = transforms.Compose([ # transforms.RandomResizedCrop(image_size), transforms.Resize(image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: self.transform = transforms.Compose([ transforms.Resize(box_size), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize, ]) def convert_raw(x): mean = torch.tensor(norm_params['mean']).view(-1).type_as(x) std = torch.tensor(norm_params['std']).view(-1).type_as(x) return x * std + mean self.convert_raw = convert_raw
def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu # suppress printing if not master if args.multiprocessing_distributed and args.gpu != 0: def print_pass(*args): pass builtins.print = print_pass if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # create model print("=> creating model '{}'".format(args.arch)) model = moco.builder.MoCo(model_names[args.arch], args.moco_dim, args.moco_k, args.moco_m, args.moco_t, args.mlp) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) # comment out the following line for debugging # raise NotImplementedError("Only DistributedDataParallel is supported.") else: pass # AllGather implementation (batch shuffle, queue update, etc.) in # this code only supports DistributedDataParallel. # raise NotImplementedError("Only DistributedDataParallel is supported.") # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code # traindir = args.data # os.path.join(args.data, 'train') normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if args.aug_plus: # MoCo v2's aug: similar to SimCLR https://arxiv.org/abs/2002.05709 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([moco.loader.GaussianBlur([.1, 2.])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] else: # MoCo v1's aug: the same as InstDisc https://arxiv.org/abs/1805.01978 augmentation = [ transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomGrayscale(p=0.2), transforms.ColorJitter(0.4, 0.4, 0.4, 0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize ] train_dataset = NIHDataset(root=args.root_dir, ann_file=args.ann_file, transforms=moco.loader.TwoCropsTransform( transforms.Compose(augmentation))) # train_dataset = datasets.ImageFolder( # traindir, # ) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler, drop_last=True) if os.path.exists(args.save_path) == False: os.mkdir(args.save_path) for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) if epoch != 0 and epoch % args.save_epoch == 0: filename = os.path.join(args.save_path, 'checkpoint_{:04d}.pth.tar'.format(epoch)) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, is_best=False, filename=filename) print("Model Saved")
def get_dataloaders(train_batchsize, val_batchsize): kwargs = {'num_workers': 20, 'pin_memory': True} input_size = INFO['model-info']['input-size'] base = '{}/{}'.format(os.environ['datadir-base'], INFO['dataset']) normalize = T.Normalize(mean=INFO['dataset-info']['normalization']['mean'], std=INFO['dataset-info']['normalization']['std']) transform = { 'train': T.Compose([ # T.Resize(tuple([int(x*(4/3)) for x in input_size])), # 放大 # T.RandomResizedCrop(input_size), # 随机裁剪后resize T.RandomHorizontalFlip(0.5), # 随机水平翻转 T.RandomVerticalFlip(0.5), # 随机垂直翻转 T.RandomApply([T.RandomRotation(90)], 0.5), # 随机旋转90/270度 T.RandomApply([T.RandomRotation(180)], 0.25), # 随机旋转180度 T.RandomApply( [T.ColorJitter(brightness=np.random.random() / 5 + 0.9)], 0.5), #随机调整图像亮度 T.RandomApply( [T.ColorJitter(contrast=np.random.random() / 5 + 0.9)], 0.5), # 随机调整图像对比度 T.RandomApply( [T.ColorJitter(saturation=np.random.random() / 5 + 0.9)], 0.5), # 随机调整图像饱和度 T.RandomCrop((input_size)), T.ToTensor(), normalize ]), 'val': T.Compose([ T.Resize(input_size), # 放大 T.ToTensor(), normalize ]) } train_dset = dset.ImageFolder('{}/{}'.format(base, 'Train'), transform=transform['train']) train4val_dset = dset.ImageFolder('{}/{}'.format(base, 'Train'), transform=transform['val']) # val_dset = dset.ImageFolder('{}/{}'.format(base, 'Val'), transform=transform['val']) labels = torch.from_numpy(np.array(train_dset.imgs)[:, 1].astype(int)) num_of_images_by_class = torch.zeros(len(train_dset.classes)) for i in range(len(train_dset.classes)): num_of_images_by_class[i] = torch.where( labels == i, torch.ones_like(labels), torch.zeros_like(labels)).sum().item() mapping = {} # for c in train_dset.classes: # if c in val_dset.classes: # mapping[train_dset.class_to_idx[c]] = val_dset.class_to_idx[c] # else: # mapping[train_dset.class_to_idx[c]] = val_dset.class_to_idx['UNKNOWN'] # mapping[-1] = val_dset.class_to_idx['UNKNOWN'] train_len = train_dset.__len__() # val_len = val_dset.__len__() train_loader = DataLoader(train_dset, batch_size=train_batchsize, sampler=sampler.RandomSampler(range(train_len)), **kwargs) train4val_loader = DataLoader(train4val_dset, batch_size=val_batchsize, sampler=sampler.RandomSampler( range(train_len)), **kwargs) # val_loader = DataLoader(val_dset, batch_size=val_batchsize, sampler=sampler.RandomSampler(range(val_len)), **kwargs) val_loader = None return train_loader, train4val_loader, val_loader, num_of_images_by_class, mapping
def __init__(self, svhn_path, curlfrac=0.5, supfrac=0.5, k=1, shuffle=True, augment=False, use_cuda=False, dload_dataset=False): self.k = k self.softplus = nn.Softplus() self.bulk = Net_Bulk() self.head = Net_Head() normalize = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) contrasttrans = transforms.Compose([ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5), transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if augment: transform = augtrans else: transform = normalize self.suptrainset = datasets.SVHN(svhn_path, split='train', transform=transform, target_transform=None, download=dload_dataset) self.testset = datasets.SVHN(svhn_path, split='test', transform=normalize, target_transform=None, download=dload_dataset) if curlfrac + supfrac > 1.0: print("CURL fraction plus SUP fraction cannot exceed 1") print("Setting to defaults") curlfrac, supfrac = 0.5, 0.5 trainset_size = len(self.suptrainset) indices = list(range(trainset_size)) end = int(np.floor((curlfrac + supfrac) * trainset_size)) curlend = int(np.floor(curlfrac / (supfrac + curlfrac) * end)) if shuffle: np.random.shuffle(indices) curltrain_indices = indices[:curlend] suptrain_indices = indices[curlend:end] self.curltrain_indices = curltrain_indices print(f"Number of labeled images: {len(suptrain_indices)}") print(f"Number of unlabeled images: {len(curltrain_indices)}") self.suptrain_sampler = SubsetRandomSampler(suptrain_indices) self.curltrain_sampler = SubsetRandomSampler(curltrain_indices) #self.curltrainset = ContrastedData(svhn_path, split='train', accepted_indices=curltrain_indices, contrast_transform=contrasttrans, k=k, transform=transform, download=dload_dataset) self.curltrainset = ApproxContrastedData( svhn_path, split='train', contrast_transform=contrasttrans, k=k, transform=normalize, download=dload_dataset) if use_cuda: if torch.cuda.is_available(): self.device = torch.device('cuda') else: print("CUDA not available") self.device = torch.device('cpu') else: self.device = torch.device('cpu') self.approxclasses = [] for i in range(10): self.approxclasses.append([]) self.bulk.to(self.device) self.head.to(self.device)
def get_train_test_dataset(name, augmented): if name == 'mnist': print('Wow such mnist') p = 0.7 if augmented else 0 train = MNIST( '../data/mnist', train=True, download=True, transform=transforms.Compose([ transforms.RandomApply( # [transforms.RandomAffine(degrees=10, translate=(0, 0.1))], [transforms.RandomRotation(10)], p), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]), ) test = MNIST('../data/mnist', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) elif name == 'fashion-mnist': print('Wow such fashion') p = 0.8 if augmented else 0 train = FashionMNIST( '../data/fashion-mnist', train=True, download=True, transform=transforms.Compose([ transforms.RandomApply( [ transforms.RandomRotation(10) # , transforms.RandomHorizontalFlip(0.5) ], p), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) test = FashionMNIST('../data/fashion-mnist', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) elif name == 'cifar10': p = 0.9 if augmented else 0 print('Wow such cifar10') train = CIFAR10('../data/cifar10', train=True, download=True, transform=transforms.Compose([ transforms.RandomApply([ transforms.RandomCrop(32, padding=4), transforms.RandomAffine(degrees=5), transforms.RandomHorizontalFlip() ], p), transforms.ToTensor(), transforms.Normalize( (0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)) ])) test = CIFAR10('../data/cifar10', train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (0.49139968, 0.48215841, 0.44653091), (0.24703223, 0.24348513, 0.26158784)) ])) else: raise NotImplemented return train, test
import torch from torchvision import transforms from torch.utils.data import Dataset import cv2 data_transforms = { 'train': transforms.Compose([ transforms.ToPILImage(), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.RandomCrop(96, padding_mode='symmetric'), transforms.RandomRotation(45), transforms.RandomApply([ transforms.ColorJitter( brightness=0, contrast=0, saturation=0, hue=0), ], p=0.8), transforms.RandomApply([ transforms.RandomGrayscale(p=1.0), ], p=0.3), transforms.RandomApply([ transforms.RandomAffine( degrees=30, translate=(0, 0.2), scale=(0.9, 1), fillcolor=0) ], p=0.8), transforms.ToTensor() ]), 'test': transforms.Compose([transforms.ToPILImage(), transforms.ToTensor()])
# -*- coding: utf-8 -*- # Author: Ji Yang <*****@*****.**> # License: MIT import random import numpy as np from PIL import Image from torch.utils.data import Dataset from torchvision import transforms padding = transforms.Compose([ transforms.Pad(20, padding_mode='reflect'), transforms.RandomRotation((-6, 6)), transforms.RandomApply([transforms.RandomAffine(0, shear=6)]), transforms.RandomCrop(128) ]) rescaling = transforms.Compose([ transforms.Resize(128), transforms.RandomApply([transforms.RandomAffine(0, shear=6)]), transforms.RandomRotation((-6, 6)) ]) crop_rescaling = transforms.Compose([ transforms.RandomCrop(84), transforms.Resize(128), transforms.RandomRotation((-6, 6)) ])
]) augcolor = [ transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ] augaffine = [ transforms.RandomAffine(20, scale=(0.9, 1.1), shear=20, resample=PIL.Image.BICUBIC, fillcolor=(100, 100, 100)) ] augtrans = transforms.Compose([ transforms.RandomApply(augcolor, p=0.8), transforms.RandomApply(augaffine, p=0.8), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) head = Net_Head() bulk = Net_Bulk() curltrainer = curl.CURL(datasets.SVHN, 10, svhn_path, bulk, head, curlloss=softplus, labeledfrac=float(labeledfrac),
def build_transforms(cfg, is_train=True): res = [] if is_train: size_train = cfg.INPUT.SIZE_TRAIN # augmix augmentation do_augmix = cfg.INPUT.DO_AUGMIX augmix_prob = cfg.INPUT.AUGMIX_PROB # auto augmentation do_autoaug = cfg.INPUT.DO_AUTOAUG autoaug_prob = cfg.INPUT.AUTOAUG_PROB # horizontal filp do_flip = cfg.INPUT.DO_FLIP flip_prob = cfg.INPUT.FLIP_PROB # padding do_pad = cfg.INPUT.DO_PAD padding = cfg.INPUT.PADDING padding_mode = cfg.INPUT.PADDING_MODE # color jitter do_cj = cfg.INPUT.CJ.ENABLED cj_prob = cfg.INPUT.CJ.PROB cj_brightness = cfg.INPUT.CJ.BRIGHTNESS cj_contrast = cfg.INPUT.CJ.CONTRAST cj_saturation = cfg.INPUT.CJ.SATURATION cj_hue = cfg.INPUT.CJ.HUE # random affine do_affine = cfg.INPUT.DO_AFFINE # random erasing do_rea = cfg.INPUT.REA.ENABLED rea_prob = cfg.INPUT.REA.PROB rea_value = cfg.INPUT.REA.VALUE # random patch do_rpt = cfg.INPUT.RPT.ENABLED rpt_prob = cfg.INPUT.RPT.PROB if do_autoaug: res.append(T.RandomApply([AutoAugment()], p=autoaug_prob)) res.append(T.Resize(size_train, interpolation=3)) if do_flip: res.append(T.RandomHorizontalFlip(p=flip_prob)) if do_pad: res.extend([ T.Pad(padding, padding_mode=padding_mode), T.RandomCrop(size_train) ]) if do_cj: res.append( T.RandomApply([ T.ColorJitter(cj_brightness, cj_contrast, cj_saturation, cj_hue) ], p=cj_prob)) if do_affine: res.append( T.RandomAffine(degrees=0, translate=None, scale=[0.9, 1.1], shear=None, resample=False, fillcolor=128)) if do_augmix: res.append(AugMix(prob=augmix_prob)) res.append(ToTensor()) if do_rea: res.append(T.RandomErasing(p=rea_prob, value=rea_value)) if do_rpt: res.append(RandomPatch(prob_happen=rpt_prob)) else: size_test = cfg.INPUT.SIZE_TEST res.append(T.Resize(size_test, interpolation=3)) res.append(ToTensor()) return T.Compose(res)
BASE_LR=0.3, MOMENTUM=0.9, WEIGHT_DECAY=1e-6, WEIGHT_DECAY_NORM=1e-6, ), CHECKPOINT_PERIOD=10, IMS_PER_BATCH=256, IMS_PER_DEVICE=32, ), INPUT=dict( AUG=dict( TRAIN_PIPELINES=[ ("RepeatList", dict(transforms=[ ("Torch_Compose", transforms.Compose([ transforms.RandomResizedCrop(224, scale=(0.08, 1.)), transforms.RandomApply([ transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), ])), ("GaussianBlur", dict(sigma=[.1, 2.], p=0.5)), ("Torch_Compose", transforms.Compose([ transforms.RandomGrayscale(p=0.2), transforms.RandomHorizontalFlip(), ])) ], repeat_times=2)), ], ) ), OUTPUT_DIR=osp.join( '/data/Outputs/model_logs/cvpods_playground/SelfSup', osp.split(osp.realpath(__file__))[0].split("SelfSup/")[-1]))
time1_str.replace(' ', '_') opt.outf = './checkpoints/model_' + time1_str try: os.makedirs(opt.outf) except OSError: pass os.environ["CUDA_VISIBLE_DEVICES"] = "2,3" ###################################################################### # Load Data # --------- transform_train_list = [ # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) transforms.Resize((384, 128), interpolation=3), # resize transforms.RandomApply([transforms.Pad(10),\ transforms.RandomCrop((384, 128))], p=0.5), # transforms.RandomGrayscale(p=0.2), # transforms.RandomCrop((256,128)), transforms.RandomHorizontalFlip(), # randomly horizon flip image transforms.ToTensor(), # convert PIL image or numpy.ndarray to tensor transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) # [m1,m2...mn][s1,s2...sn] for n channels ] transform_train_list2 = [ # transforms.RandomResizedCrop(size=128, scale=(0.75,1.0), ratio=(0.75,1.3333), interpolation=3), #Image.BICUBIC) transforms.Resize((384, 128), interpolation=3), # resize # transforms.RandomGrayscale(p=0.2), # transforms.RandomCrop((256,128)), # transforms.RandomHorizontalFlip(),#randomly horizon flip image transforms.ToTensor(), # convert PIL image or numpy.ndarray to tensor transforms.Normalize( [0.485, 0.456, 0.406],