class TorchVisionTransformerComposition(DataTransformation): possible_transforms = { 'crop': lambda shape: transforms.Lambda(lambda x: transforms.functional.crop(x, *shape)), 'reshape': lambda shape: transforms.Resize(shape[-3:-1]), 'float': lambda _: transforms.Lambda(lambda x: x.float()), 'torch': lambda _: transforms.ToTensor(), 'normalize': lambda _: transforms.Lambda(lambda x: x/255.) } @staticmethod def unpack(transform_name_list, shape: Optional[Iterable[int]] = None): transforms_list = [] for t in transform_name_list: try: transforms_list.append(TorchVisionTransformerComposition.possible_transforms[t](shape)) except KeyError as e: raise NotImplementedError(f'Transformation {t} not available') return transforms.Compose(transforms_list) def __init__(self, transform_list: List[str], shape: Optional[Iterable[int]] = None): self.transforms = TorchVisionTransformerComposition.unpack(transform_list, shape) def transform(self, data: Dict[str, Any]): img = Image.fromarray(data) if self.transforms is not None: # print(self.transform) img = self.transforms(img) # print(img.shape) return img
def get_transform(self): if self.mode == "train": return transforms.Compose([ # RandomCrop(500), # Normalize(), transforms.ToTensor(), # Normalize(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(1.0, 1.0, 1.0)), ]) elif self.mode == "val": return transforms.Compose([ # transforms.CenterCrop(500), # Normalize(), transforms.ToTensor(), # Normalize(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(1.0, 1.0, 1.0)), ]) else: return transforms.Compose([ FiveCrop(500), transforms.Lambda( lambda crops: [transforms.ToTensor()(crop) for crop in crops] ), transforms.Lambda( # lambda crops: torch.stack([Normalize()(crop) for crop in crops]) lambda crops: torch.stack([transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(1.0, 1.0, 1.0))(crop) for crop in crops]) ), ])
def get_transform_val(self, size): if self.crop == 'five' or self.crop == 'multi': transform_val = [ transforms.Resize(int(size[0] * (1.14))), transforms.FiveCrop(size) ] transform_val.append( transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops]))) transform_val.append( transforms.Lambda(lambda crops: torch.stack([ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(crop) for crop in crops ]))) else: transform_val = [ transforms.Resize(int(size[0] * (1.14))), transforms.CenterCrop(size) ] transform_val.append(transforms.ToTensor()) transform_val.append( transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])) return transforms.Compose(transform_val)
def __init__(self, root, annotation_paths, frames_per_clip, train_batch_size, test_batch_size=0, step_between_clips=1, collate_fn=None): self.root = root self.annotation_paths = annotation_paths self.frames_per_clip = frames_per_clip self.train_batch_size = train_batch_size self.test_batch_size = test_batch_size self.step_between_clips = step_between_clips self.collate_fn = collate_fn mean = torch.tensor([0.485, 0.456, 0.406]).float() std = torch.tensor([0.229, 0.224, 0.225]).float() self.transform = transforms.Compose([ transforms.Lambda(lambda x: x / 255.), # scale in [0, 1] transforms.Lambda( lambda x: x.sub_(mean).div_(std)), # z-normalization transforms.Lambda( lambda x: x.permute(0, 3, 1, 2)), # reshape into (T, C, H, W) VCenterCrop((224, 224)) ]) self.datasets = []
def app(opt): print(opt) # Load MNIST / FashionMNIST dataset train_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST( opt.data, train=True, download=True, transform=torchvision.transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x * 32)])), drop_last=True, batch_size=opt.batch_size, shuffle=True) # Load MNIST/ FashionMNIST dataset val_loader = torch.utils.data.DataLoader(torchvision.datasets.MNIST( opt.data, train=False, download=True, transform=torchvision.transforms.Compose( [transforms.ToTensor(), transforms.Lambda(lambda x: x * 32)])), drop_last=True, batch_size=opt.batch_size, shuffle=True) model = n3ml.model.Wu2018(batch_size=opt.batch_size, time_interval=opt.time_interval).cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr) lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[30, 60, 90]) best_acc = 0 for epoch in range(opt.num_epochs): start = time.time() train_acc, train_loss = train(train_loader, model, criterion, optimizer) end = time.time() print( 'total time: {:.2f}s - epoch: {} - accuracy: {} - loss: {}'.format( end - start, epoch, train_acc, train_loss)) val_acc, val_loss = validate(val_loader, model, criterion) if val_acc > best_acc: best_acc = val_acc state = { 'epoch': epoch, 'model': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict() } print('in test, epoch: {} - best accuracy: {} - loss: {}'.format( epoch, best_acc, val_loss)) lr_scheduler.step()
def forward(self, data, mode='train'): ############## # BRANCH MODEL ############## if mode == 'branch': data = [data] res = [] if mode == 'head': res = data else: for i in range(len(data)): x = self.conv2d_1(data[i]) x = nn.ReLU()(x) x = self.maxpool1(x) # 96x96x64 x = self.bn1(x) x = self.conv2d_2(x) x = nn.ReLU()(x) x = self.bn2(x) x = self.conv2d_3(x) x = nn.ReLU()(x) x = self.layer_1(x) x = self.layer_2(x) x = self.layer_3(x) x = self.layer_4(x) x = self.globmaxpool2d(x) # 512 x = x.view((data[0].shape[0], self.branch_features)) res.append(x) if mode == 'branch': return res[0] ############ # HEAD MODEL ############ x1 = transforms.Lambda(lambda x: x[0] * x[1])(res) x2 = transforms.Lambda(lambda x: x[0] + x[1])(res) x3 = transforms.Lambda(lambda x: torch.abs(x[0] - x[1]))(res) x4 = transforms.Lambda(lambda x: torch.pow(x, 2))(x3) x = torch.cat([x1, x2, x3, x4], dim=1) x = x.view((res[0].shape[0], 1, 4, res[0].shape[1])) # Per feature NN with shared weight is implemented using CONV2D with appropriate stride. x = self.conv2d_head_1(x) x = nn.ReLU()(x) x = x.transpose(1, 2).transpose(2, 3) x = self.conv2d_head_2(x) x = x.view(x.size()[0], -1) # Weighted sum implemented as a Dense layer. x = self.fc_head(x) x = nn.Sigmoid()(x) return x
def resize_mask(padded_values, origin_size): # resize generated mask back to the input image size unpad = tuple(map(lambda x: -x, padded_values)) upsampler = nn.Upsample(size=tuple(reversed(origin_size)), mode='bilinear', align_corners=False) m = Compose([ torch.nn.ZeroPad2d(unpad), transforms.Lambda(lambda x: upsampler(x.float())), transforms.Lambda(lambda x: x.expand(-1, 3, -1, -1) > 0) ]) return m
def main(): start_time = time.time() args = parse_args() default_config = Config() args = merge_config(default_config,args) if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) os.environ['CUDA_VISIBLE_DEVICES'] = args.device_ids write_dict(vars(args), os.path.join(args.save_dir, 'arguments.csv')) torch.manual_seed(args.seed) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.device_ids train_transformer = transforms.Compose([ transforms.Resize(size=image_size), transforms.ToTensor(), transforms.Lambda(normalize) ]) val_transformer = transforms.Compose([ transforms.Resize(size=image_size), transforms.ToTensor(), transforms.Lambda(normalize) ]) train_dataset = ChestIUXRayDataset(args.train_finding_file,args.words_file,args.tags_file,args.image_dir,transformer=train_transformer) val_dataset = ChestIUXRayDataset(args.val_finding_file,args.words_file,args.tags_file,args.image_dir,transformer=val_transformer) setattr(args,'dict_size',train_dataset.get_words_size()) setattr(args,'max_words',train_dataset.get_config()['MAX_WORDS']) setattr(args,'max_sent',train_dataset.get_config()['MAX_SENT']) setattr(args,'init_embed',train_dataset.get_word_embed()) display_args(args) image_encoder = create_image_encoder(args.tag_size,args.backbone,args.image_encoder_checkpoint) model = AggregationCaptionModel(args) train(model,image_encoder,train_dataset,val_dataset,args)
def __init__(self, mean, std, img_folder=None, resize=512): self.eval_imgs = [glob.glob(img_folder + "**/*.{}".format(i), recursive=True) for i in ['jpg', 'jpeg', 'png']] self.eval_imgs = list(chain.from_iterable(self.eval_imgs)) assert resize % 8 == 0 self.resize = resize self.transformer = Compose([ToTensor(), transforms.Lambda(lambda x: x.unsqueeze(0)) ]) self.normalizer = Compose([transforms.Lambda(lambda x: x.squeeze(0)), Normalize(mean=mean, std=std), transforms.Lambda(lambda x: x.unsqueeze(0)) ]) print("Find {} test images. ".format(len(self.eval_imgs)))
def __init__(self, img_folder=None): self.eval_imgs = [ glob.glob(img_folder + "**/*.{}".format(i), recursive=True) for i in ['jpg', 'jpeg', 'png'] ] self.eval_imgs = list(chain.from_iterable(self.eval_imgs)) self.transformer = Compose( [ToTensor(), transforms.Lambda(lambda x: x.unsqueeze(0))]) self.normalizer = Compose([ transforms.Lambda(lambda x: x.squeeze(0)), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.Lambda(lambda x: x.unsqueeze(0)) ]) print("Find {} test images. ".format(len(self.eval_imgs)))
def inference(args, net): net.eval() data_set = YXSOcrDataset( args.syn_root, transforms=trans, target_transforms=transforms.Lambda(lambda x: torch.from_numpy(x)), mode='inference') data_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers) class_id_list = [] for sample in tqdm(data_loader): image = sample['image'].to(device) outputs = net(image) # [B,N,C] _, class_ids = torch.max(outputs, dim=-1) class_id_list.append(class_ids.cpu().detach().numpy()) class_id_np = np.concatenate(class_id_list, axis=0) class_name_np = np.vectorize(lambda i: data_set.alpha[i])(class_id_np) with codecs.open('answer.{:03d}.csv'.format(args.epochs), mode='w', encoding='utf-8') as writer: idx = 0 for im_path in data_set.image_path_list: im = cv2.imread(im_path, 0) h, w = im.shape[:2] shift = w // 25 text = class_name_np[idx:idx + shift] writer.write('{},{}\n'.format( os.path.splitext(os.path.basename(im_path))[0], ''.join(text))) idx += shift
def __getitem__(self, index): if self.training: return self.X[index], \ self.y[index] else: return transforms.Lambda( lambda x: transforms.ToTensor()(transforms.Resize((int(x.size[1] * 1 / self.factor), int(x.size[0] * 1 / self.factor)), interpolation=Image.BICUBIC)( x)))( Image.open(self.root + self.y_dir + self.y[index])), \ transforms.Lambda( lambda x: transforms.ToTensor()(transforms.Resize((int(x.size[1] * 1 / (self.factor/2)), int(x.size[0] * 1 / (self.factor/2))), interpolation=Image.BICUBIC)( x)))( Image.open(self.root + self.y_dir + self.y[index]))
def __init__(self, resolution: Tuple[int, int]): self.transforms = transforms.Compose( [ transforms.ToTensor(), transforms.Lambda(lambda X: 2 * X - 1.0), # rescale between -1 and 1 transforms.Resize(resolution), ] )
def main(): start_time = time.time() args = args_parser() if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) write_dict(vars(args), os.path.join(args.save_dir, 'arguments.csv')) torch.manual_seed(args.seed) cudnn.benchmark = True torch.cuda.manual_seed_all(args.seed) os.environ['CUDA_VISIBLE_DEVICES'] = args.device_ids train_transformer = transforms.Compose([ transforms.Resize(size=(256, 256)), transforms.RandomCrop(size=image_size), transforms.RandomRotation(10), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.Lambda(normalize) ]) train_dataset = ChestXRay14Dataset(args.image_dir, args.train_file, train_transformer) val_transformer = transforms.Compose([ transforms.Resize(size=image_size), transforms.ToTensor(), # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.Lambda(normalize) ]) val_dataset = ChestXRay14Dataset(args.image_dir, args.val_file, val_transformer) print(vars(args)) if args.model == 'simple': model = simple_mlc_model(train_dataset.get_tag_size(), backbone=args.backbone) else: model = mlc_model(train_dataset.get_tag_size(), backbone=args.backbone) train(model, train_dataset, val_dataset, args)
def get_basetransform(dataset): if dataset == 'cifar10' or dataset == 'cifar100' or dataset == 'svhn': normalize = transforms.Normalize(_CIFAR_MEAN, _CIFAR_STD) cutout = 16 if 'cifar' in dataset else 20 transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.Lambda(lambda x: [x]), ## Locate new policy transforms.Lambda(lambda imgs: torch.stack([ CutoutDefault(cutout)(normalize(transforms.ToTensor()(img))) for img in imgs ])) ]) transform_test = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_target = lambda target: target elif dataset == 'imagenet': image_size = 224 normalize = transforms.Normalize(_IMAGENET_MEAN, _IMAGENET_STD) transform_train = transforms.Compose([ transforms.RandomResizedCrop(image_size, interpolation=Image.BICUBIC), transforms.RandomHorizontalFlip(), transforms.Lambda(lambda x: x), ## Locate new policy transforms.Lambda(lambda imgs: torch.stack( [normalize(transforms.ToTensor()(img)) for img in imgs])) ]) transform_test = transforms.Compose([ transforms.Resize(image_size + 32, interpolation=Image.BICUBIC), transforms.CenterCrop(image_size), transforms.ToTensor(), normalize ]) transform_target = lambda target: target return transform_train, transform_test, transform_target
def img_open(path): data = PIL.Image.open(path) height = 32 width = int(data.size[0] / (data.size[1] / height)) data = data.resize((width, height)) Transform = transforms.Compose([ transforms.Grayscale(), transforms.ToTensor(), transforms.Lambda(lambda x: torch.unsqueeze(x, 0)) ]) data = Transform(data) return data
def getLoader(self): transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: x.view(-1)) ]) trainset = MNIST('.', train=True, download=True, transform=transform) testset = MNIST('.', train=False, download=True, transform=transform) trainloader = DataLoader(trainset, batch_size=128, shuffle=True) testloader = DataLoader(testset, batch_size=128, shuffle=True) return trainloader, testloader
def log_sample_images(self, data=None, epoch=-1): """Image logger. Saves sample images for visual inspection. If data is provided, it is considered to be images and is logged. Else, sample_embeddings are used to generate images. Images are arbitrarily but consistently enumerated, every image gets its own directory inside the root_image_dir directory, and every epoch inside its own directory as well. Args: data(list of torch.Tensors): optional, real images to be saved. epoch(int): number of epoch generator has been trained. """ transform = transforms.Compose([ transforms.Lambda(lambda x: x.to('cpu')), transforms.Normalize((-1, -1, -1), (2, 2, 2)), transforms.ToPILImage() ]) if data is None: # use sample embeddings noise = torch.randn(self.sample_embeddings.size(0), self.noise_dim, device=self.device) self.stackgan.eval() with torch.no_grad(): images = self.stackgan.generate(self.sample_embeddings, noise) self.stackgan.train() for j in range(images[0].size(0)): # iterate embeddings img_dir = os.path.join(self.root_image_dir, 'image_{}'.format(j), str(epoch)) if not os.path.exists(img_dir): os.makedirs(img_dir) for i, images_scale in enumerate(images): # iterate scales image = transform(images_scale[j]) image.save(os.path.join(img_dir, 'scale_{}.jpg'.format(i))) else: # data are real images for j, image in enumerate(data): img_dir = os.path.join(self.root_image_dir, 'image_{}'.format(j)) if not os.path.exists(img_dir): os.makedirs(img_dir) image = transform(image) image.save(os.path.join(img_dir, 'real.jpg'))
def __init__(self, root_dir, K=8, image_shape=(256, 256, 3), id_sampling=False, is_train=True, random_seed=0, pairs_list=None, augmentation_params=None, crop_prob=0.5): self.root_dir = root_dir self.images = os.listdir(root_dir) self.image_shape = tuple(image_shape) self.pairs_list = pairs_list self.id_sampling = id_sampling self.K = K if os.path.exists(os.path.join(root_dir, 'train')): assert os.path.exists(os.path.join(root_dir, 'test')) print("Use predefined train-test split.") if id_sampling: train_images = {os.path.basename(image).split('#')[0] for image in os.listdir(os.path.join(root_dir, 'train'))} train_images = list(train_images) else: train_images = os.listdir(os.path.join(root_dir, 'train')) test_images = os.listdir(os.path.join(root_dir, 'test')) self.root_dir = os.path.join(self.root_dir, 'train' if is_train else 'test') else: print("Use random train-test split.") train_images, test_images = train_test_split(self.images, random_state=random_seed, test_size=0.2) if is_train: self.images = train_images else: self.images = test_images self.is_train = is_train if self.is_train: crop = transforms.RandomResizedCrop(image_shape[0], scale=[0.8, 1.0], ratio=[0.9, 1.1]) rand_crop = transforms.Lambda(lambda x: crop(x) if random.random() < crop_prob else x) self.transform = transforms.Compose([ rand_crop, transforms.Resize([image_shape[0], image_shape[1]]), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]), ]) else: self.transform = transforms.Compose([ # transforms.Resize([image_shape[0], image_shape[1]]), transforms.ToTensor(), # transforms.Normalize(mean=[0.5, 0.5, 0.5], # std=[0.5, 0.5, 0.5]), ])
def get_transforms(name, image_size, support_data_augmentation=None, query_data_augmentation=None, *args, **kwargs): """ Uses gin to produce the corresponding torchvision transforms Args: image_size: input image size support_data_augmentation: support set DataAugmentation specification query_data_augmentation: query set DataAugmentation specification *args: consume the rest of gin arguments **kwargs: consume the rest of gin arguments Returns: """ # Numpy transforms support_transforms = [] query_transforms = [] if name in ["quickdraw", "omniglot"]: size = int(np.ceil(image_size / 32.)) * 32 + 1 support_transforms.append( transforms.Lambda( lambda im: cv2.resize(im, (size, size), cv2.INTER_CUBIC))) query_transforms.append( transforms.Lambda( lambda im: cv2.resize(im, (size, size), cv2.INTER_CUBIC))) support_transforms += parse_augmentation(support_data_augmentation, image_size) query_transforms += parse_augmentation(query_data_augmentation, image_size) # PIL transforms support_transforms.append(transforms.ToTensor()) # Tensor transforms return support_transforms, query_transforms
def get_transform(args): if args.dataset == 'celeba': crop_size = 108 re_size = 64 offset_height = (218 - crop_size) // 2 offset_width = (178 - crop_size) // 2 crop = lambda x: x[:, offset_height:offset_height + crop_size, offset_width:offset_width + crop_size] preprocess = transforms.Compose( [transforms.ToTensor(), transforms.Lambda(crop), transforms.ToPILImage(), transforms.Scale(size=(re_size, re_size), interpolation=Image.BICUBIC), transforms.ToTensor(), transforms.Normalize(mean=[0.5] * 3, std=[0.5] * 3)]) return preprocess
def _val_img_input_transform(self, args): if self.args.segnet == 'bisenet': mean_std = ([0.406, 0.456, 0.485], [0.225, 0.224, 0.229]) img_transform = standard_transforms.Compose([ FlipChannels(), standard_transforms.ToTensor(), standard_transforms.Normalize(*mean_std) ]) elif self.args.segnet == 'swiftnet': mean_std = ([72.3, 82.90, 73.15], [47.73, 48.49, 47.67]) img_transform = standard_transforms.Compose([ FlipChannels(), standard_transforms.ToTensor(), standard_transforms.Lambda(lambda x: x.mul_(255)), standard_transforms.Normalize(*mean_std), ]) return img_transform
def parse_augmentation(augmentation_spec, image_size): """ Loads the data augmentation configuration Args: augmentation_spec: DataAugmentation instance image_size: the output image size Returns: torchvision.transforms object with the corresponding augmentation """ def gaussian_noise(x, std): """ Helper function to add gaussian noise Args: x: input std: standard deviation for the normal distribution Returns: perturbed image """ x += torch.randn(x.size()) * std return x def rescale(x): """ Rescales the image between -1 and 1 Args: x: image Returns: rescaled image """ return (x * 2) - 1 _transforms = [] if augmentation_spec.enable_gaussian_noise and \ augmentation_spec.gaussian_noise_std > 0: f = partial(gaussian_noise, std=augmentation_spec.gaussian_noise_std) _transforms.append(transforms.Lambda(f)) if augmentation_spec.enable_jitter and \ augmentation_spec.jitter_amount > 0: _transforms.append(transforms.ToPILImage()) amount = augmentation_spec.jitter_amount _transforms.append(transforms.RandomCrop(image_size, padding=amount)) return _transforms
def inference(args, net): net.eval() data_set = DataSetArt(args.data_root, img_transforms=trans, target_transforms=transforms.Lambda(lambda x: torch.from_numpy(x)), mode='test') data_loader = DataLoader(data_set, batch_size=args.batch_size, shuffle=False, drop_last=False, num_workers=args.workers) class_id_list = [] for sample in tqdm(data_loader): image = sample['image'].to(device) outputs = net(image) # [B,N,C] _, class_ids = torch.max(outputs, dim=-1) class_id_list.append(class_ids.cpu().detach().numpy()) class_id_np = np.concatenate(class_id_list, axis=0) class_id_pd = pd.DataFrame(class_id_np) class_id_pd.to_csv('rst.art.csv', header=None)
def sdo_dataset_normalize(channel: Union[str, int], resize: Optional[int] = None): """ Apply the normalization necessary for the sdo-dataset. Depending on the channel, it: - flip the image vertically - clip the "pixels" data in the predefined range (see above) - apply a log10() on the data - normalize the data to the [0, 1] range - normalize the data around 0 (standard scaling) :param channel: The kind of data to preprocess :param resize: Optional size of image (integer) to resize the image :return: a transforms object to preprocess tensors """ preprocess_config = CHANNEL_PREPROCESS[str(channel).lower()] lambda_transform = lambda x: torch.clamp( transforms_functional.vflip(x), min=preprocess_config["min"], max=preprocess_config["max"], ) mean = preprocess_config["min"] std = preprocess_config["max"] - preprocess_config["min"] if preprocess_config["scaling"] == "log10": base_lambda = lambda_transform lambda_transform = lambda x: torch.log10(base_lambda(x)) mean = math.log10(preprocess_config["min"]) std = math.log10(preprocess_config["max"]) - math.log10( preprocess_config["min"]) transform = [ transforms.Lambda(lambda_transform), transforms.Normalize(mean=[mean], std=[std]), transforms.Normalize(mean=[0.5], std=[0.5]), ] if resize is not None: transform.insert(0, transforms.Resize(resize)) return transforms.Compose(transform)
def _mnist(args: Namespace, binary: bool, crop: Union[None, int] = None) -> torch.Tensor: """Load the MNIST dataset. Args: args: The CLI arguments. binary: Whether to binarize the tensors. crop: The size of the image after a center crop. If None, will not crop the image. Returns: The MNIST dataset as a Tensor fully loaded into memory, shaped according to batch size and maze size.. """ os.makedirs(os.path.join(ROOT, 'data', 'mnist'), exist_ok=True) transform = [] if crop is not None: transform.append(transforms.CenterCrop(crop)) transform.append(transforms.Resize(args.img_size)) transform.append(transforms.ToTensor()) if binary: transform.append(transforms.Lambda(lambda x: torch.round(x))) else: transform.append(transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))) data = datasets.MNIST(os.path.join(ROOT, 'data', 'mnist'), train=True, download=True, transform=transforms.Compose(transform)) mnist_loader = torch.zeros(data.train_data.size(0), args.img_size, args.img_size).type(TENSOR) for idx in range(len(data)): mnist_loader[idx], _ = data[idx] return mnist_loader.reshape(-1, args.batch_size, 1, args.img_size, args.img_size).type(TENSOR)
def _data_loader(self, data, labels=None, batch_size=10, shuffle=False, num_workers=0): """Returns `torch.DataLoader` generated from the input CDataset. Parameters ---------- data : CArray CArray containing the input data to load. labels : CArray CArray containing the labels for the data. batch_size : int, optional Size of the batches to load for each iter of the data loader. Default value is 10. shuffle : bool, optional Whether to shuffle the data before dividing in batches. Default value is False. num_workers : int, optional Number of additional processes to use for loading the data. Default value is 0. Returns ------- `CDataLoaderPyTorch` iterator for loading the dataset in batches, optionally shuffled, with the specified number of workers. """ transform = transforms.Lambda(lambda x: x.reshape(self._input_shape)) return CDataLoaderPyTorch( data, labels, batch_size, shuffle=shuffle, transform=transform, num_workers=num_workers, ).get_loader()
def create_transform(self): ''' Defines the transformation applied to the images before being returned. This can be extended as required ''' to_tensor = transforms.ToTensor() to_img = transforms.ToPILImage() self.apply_noise = transforms.Lambda(lambda x: to_img( torch.clamp( to_tensor(x) + self.noise_factor * torch.randn_like( to_tensor(x)), 0.0, 1.0))) change_colour = transforms.ColorJitter(brightness=(0.5, 1.5), contrast=(0.5, 1.5), saturation=(0, 1.5), hue=0) transform_list = [ transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply([change_colour], p=0.9) ] self.augment_data = transforms.Compose(transform_list) if not 'mean' in self.cfg.keys(): mean = [0.5, 0.5, 0.5] else: mean = [float(val) for val in self.cfg['mean'].split(',')] if not 'std' in self.cfg.keys(): std = [0.5, 0.5, 0.5] else: std = [float(val) for val in self.cfg['std'].split(',')] self.transform_to_tensor = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
def __init__( self, epoch, dataset_path='./drive/My Drive/datasets/car classification/train_dataset', val_path='./drive/My Drive/datasets/car classification/val_data', batch_size=128, model_name='tf_efficientnet_b0_ns', ckpt_path='./drive/My Drive/ckpt/190.pth', test_number=5000, pseudo_test=True, crop='five', csv_path='', mode='fix', sizes=(680, 600, 528)): self.epoch = epoch self.dataset_path = dataset_path self.val_path = val_path self.batch_size = batch_size self.model_name = model_name self.ckpt_path = ckpt_path self.test_number = test_number self.pseudo_test = pseudo_test self.crop = crop self.csv_path = csv_path self.mode = mode self.sizes = sizes if model_name == 'tf_efficientnet_b0_ns': self.input_size = (224, 224) elif model_name == 'tf_efficientnet_b3_ns': self.input_size = (300, 300) elif model_name == 'tf_efficientnet_b4_ns': self.input_size = (480, 480) elif model_name == 'tf_efficientnet_b6_ns': self.input_size = (680, 680) # 528 else: raise Exception('non-valid model name') # Compose transforms transform = [] fill = lambda i: transforms.Resize((i.size[1] * (2**torch.ceil( torch.log2(torch.tensor(self.input_size[1] / i.size[1])) )), i.size[0] * (2**torch.ceil( torch.log2(torch.tensor(self.input_size[1] / i.size[1]))))))( i) if i.size[0] < self.input_size[0] or i.size[ 1] < self.input_size[1] else i if crop == 'center': transform.append(transforms.CenterCrop(self.input_size[0])) transform.append(transforms.ToTensor()) transform.append( transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])) elif crop == 'five': transform.append(transforms.Lambda(fill)) transform.append(transforms.FiveCrop(self.input_size[0])) transform.append( transforms.Lambda(lambda crops: torch.stack( [transforms.ToTensor()(crop) for crop in crops]))) transform.append( transforms.Lambda(lambda crops: torch.stack([ transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]) (crop) for crop in crops ]))) self.transform = transforms.Compose(transform) if self.pseudo_test: if crop == 'multi': self.transform_val = [] self.dataset = [] self.dataloader = [] for i in range(len(self.sizes)): self.transform_val.append( self.get_transform_val((self.sizes[i], self.sizes[i]))) self.dataset.append( ImageFolder(self.dataset_path, transform=self.transform_val[i])) self.dataloader.append( DataLoader(self.dataset[i], batch_size=self.batch_size, num_workers=1, shuffle=False)) else: self.dataset = ImageFolder(self.dataset_path, transform=self.transform_val) self.dataloader = DataLoader(self.dataset, batch_size=self.batch_size, num_workers=1, shuffle=False) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') self.model = create_model(model_name, num_classes=196).to(self.device) if self.mode == 'fix': ckpt = torch.load(self.ckpt_path) self.model.load_state_dict(ckpt['model']) else: ckpt = torch.load(self.ckpt_path) self.model.load_state_dict(ckpt['model_state_dict']) self.start_epoch = 0 l = [d.name for d in os.scandir(self.val_path) if d.is_dir()] l.sort() l[l.index('Ram CV Cargo Van Minivan 2012' )] = 'Ram C/V Cargo Van Minivan 2012' self.label_texts = l
def train(args): torch.backends.cudnn.benchmark = True data_set = DataSetArt(args.data_root, img_transforms=transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=0.4, saturation=0.4, hue=0.4), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), target_transforms=transforms.Lambda(lambda x: torch.from_numpy(x))) train_sampler = torch.utils.data.RandomSampler(data_set) data_loader = DataLoader(data_set, batch_size=args.batch_size, sampler=train_sampler, num_workers=args.workers) # net = ResNetModel(num_classes=49) params = filter(lambda p: p.requires_grad, net.parameters()) # for n, p in net.named_parameters(): # if p.requires_grad: # print(n) print(net) net.train() net.to(device) optimizer = optim.Adadelta(params, weight_decay=args.weight_decay) # 加载预训练模型 if args.init_epoch > 0: checkpoint = torch.load(os.path.join(args.output_dir, 'art.{:03d}.pth'.format(args.init_epoch)), map_location='cpu') optimizer.load_state_dict(checkpoint['optimizer']) # lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) net.load_state_dict(checkpoint['model']) # 训练 for epoch in range(args.init_epoch, args.epochs): epoch_loss = 0 accuracy_num = 0 for sample in tqdm(data_loader): image = sample['image'].to(device) target = sample['target'].to(device) outputs = net(image) # [B,N,C] loss = F.cross_entropy(outputs, target) # 梯度更新 net.zero_grad() loss.backward() optimizer.step() # 当前轮的loss epoch_loss += loss.item() * image.size(0) # 统计精度 _, class_ids = torch.max(outputs, dim=-1) accuracy_num += np.sum(class_ids.cpu().detach().numpy() == sample['target'].numpy()) epoch_loss = epoch_loss / len(data_loader.dataset) acc = accuracy_num / len(data_loader.dataset) # 打印日志,保存权重 print('Epoch: {}/{} loss: {:03f} acc: {:.3f}'.format(epoch + 1, args.epochs, epoch_loss, acc)) # 保存模型 if args.output_dir: checkpoint = { 'model': net.state_dict(), 'optimizer': optimizer.state_dict(), # 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch + 1, 'args': args} torch.save(checkpoint, os.path.join(args.output_dir, 'art.{:03d}.pth'.format(epoch + 1))) return net