class ViTVisualBackbone(VisualBackbone): def __init__( self, name: str = 'B_32', visual_feature_size: int = 768, pretrained: bool = True, frozen: bool = False, image_size: int = 224, ): super().__init__(visual_feature_size) self.cnn = ViT(name, pretrained=pretrained, image_size=(image_size, image_size)) del self.cnn.norm del self.cnn.fc self.patch_size = round( math.sqrt(self.cnn.positional_embedding.pos_embedding.shape[1])) if frozen: for param in self.cnn.parameters(): param.requires_grad = False self.cnn.eval() def forward(self, image: torch.Tensor): out = self.cnn(image)[:, 1:] # remove cls token out = out.permute(0, 2, 1).contiguous() # NLD -> NDL B, D, _ = out.shape out = out.view(B, D, self.patch_size, self.patch_size) return out
def __init__(self, num_classes, args): super(VisionTransformer, self).__init__() self.num_classes = num_classes if hasattr(args, 'vis_attention'): base_model = ViT(name=args.model_type, pretrained=args.pretrained, num_classes=self.num_classes, image_size=args.image_size, visualize=True) else: base_model = ViT(name=args.model_type, pretrained=args.pretrained, num_classes=self.num_classes, image_size=args.image_size) self.model = base_model
def __init__(self): super(Model, self).__init__() # self.backbone = models.resnet18(pretrained=True).to(device) self.backbone = model = ViT('B_16_imagenet1k', pretrained=True).to(device) for name, param in self.backbone.named_parameters(): param.requires_grad = False # print(name, param.shape) # self.backbone.fc = nn.Linear(512, 10).to(device) self.backbone.fc = nn.Linear(768, 10).to(device)
def _make(cls) -> modelViT: """ Download model from ViT library to local file storage :return: ViT model """ return ViT( name=cls._type, image_size=cls._image_size, pretrained=True, )
def __init__( self, name: str = 'B_32', visual_feature_size: int = 768, pretrained: bool = True, frozen: bool = False, image_size: int = 224, ): super().__init__(visual_feature_size) self.cnn = ViT(name, pretrained=pretrained, image_size=(image_size, image_size)) del self.cnn.norm del self.cnn.fc self.patch_size = round( math.sqrt(self.cnn.positional_embedding.pos_embedding.shape[1])) if frozen: for param in self.cnn.parameters(): param.requires_grad = False self.cnn.eval()
def get_model(name): assert name in [ 'ViT_B_16_imagenet1k', 'ViT_B_32_imagenet1k', 'ViT_L_16_imagenet1k', 'ViT_L_32_imagenet1k', 'ViT_B_16', 'ViT_B_32', 'ViT_L_32' ] name = name[4:] model = ViT(name, pretrained=True) preprocessing = functools.partial(load_preprocess_images, image_size=model.image_size[0]) wrapper = PytorchWrapper(identifier=name, model=model, preprocessing=preprocessing) wrapper.image_size = model.image_size[0] return wrapper
def model_vit_sizes( image_size: int = MODEL_INPUT, name: str = MODEL_VIT_TYPE, pretrained: bool = True, ): """ Print models parameters :param name: one of ViT model names :param image_size: input image size :param pretrained: flag for pretrained model :return: """ model = ViT( name=name, image_size=image_size, pretrained=pretrained, ) print(f'ViT model {name} with input size {image_size}') print( f'Model parameters: {sum([param.nelement() for param in model.parameters()])}' )
import torch import torch.nn as nn import torch.nn.functional as F import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl from pytorch_pretrained_vit import ViT model_path = './p1_model.pth' num_classes = 37 # Decide which device we want to run on device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") model = ViT('B_16_imagenet1k', pretrained=True, num_classes=num_classes) model = model.to(device) model.load_state_dict(torch.load(model_path, map_location=device)) # size = (1, 577, 768) s = model.positional_embedding.pos_embedding.shape # remove first patch # size = (576, 768) pos_patch = model.positional_embedding.pos_embedding.view(*s[1:])[:-1].view( 24 * 24, -1) # Visualize position embedding similarities. # One cell shows cos similarity between an embedding and all the other embeddings. pos_embed = model.positional_embedding.pos_embedding[0, 1:] #size=(576, 768) cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6) fig, axes = plt.subplots(figsize=(12, 10), nrows=24, ncols=24)
# save_model(train_model(epochs, transformer, "Transformer", imagenette_data, batch_size, model_dir), "Transformer", model_dir) # transformer = load_model(f"E:/Git/SKP_Transformer/models/trained_models/Transformer.pt") # print(f"Training time for {epochs} epochs : {time.time() - start_time}") # print_accuracy_per_class(transformer, classes, batch_size, imagenette_data.test_loader) # print_accuracy(transformer, classes, batch_size, imagenette_data.test_loader) # ViTPretrained = ViT('B_16_imagenet1k', pretrained=True, num_classes=10) #, patches=4, num_classes=10, dim=64) # print(f"Parameters {count_model_parameters(ViTPretrained, False)}") # start_time = time.time() # save_model(train_model(epochs, ViTPretrained, "ViTPretrained", imagenette_data, batch_size, model_dir), "ViTPretrained", model_dir) # ViTPretrained = load_model(f"E:/Git/SKP_Transformer/models/trained_models/ViTPretrained.pt") # print(f"Training time for {epochs} epochs : {time.time() - start_time}") # print_accuracy_per_class(ViTPretrained, classes, batch_size, imagenette_data.test_loader) # print_accuracy(ViTPretrained, classes, batch_size, imagenette_data.test_loader) ViTFromScratch = ViT('B_16_imagenet1k', pretrained=False, num_classes=10) #, patches=4 dim=64) print(f"Parameters {count_model_parameters(ViTFromScratch, False)}") start_time = time.time() save_model( train_model(epochs, ViTFromScratch, "ViTFromScratch", imagenette_data, batch_size, model_dir), "ViTFromScratch", model_dir) ViTFromScratch = load_model( f"E:/Git/SKP_Transformer/models/trained_models/ViTFromScratch.pt") print(f"Training time for {epochs} epochs : {time.time() - start_time}") print_accuracy_per_class(ViTFromScratch, classes, batch_size, imagenette_data.test_loader) print_accuracy(ViTFromScratch, classes, batch_size, imagenette_data.test_loader) # NA TRANSFORMER 100 epochs # model_name = "na_transformer"
class MegaSizer(torch.nn.Module): def __init__(self, wrap): super().__init__() self.wrap = wrap self.sizer = transforms.Resize(224, interpolation=InterpolationMode.BICUBIC) def forward(self, x): x = self.sizer(x) return self.wrap(x) if __name__ == '__main__': dev = 'cuda:1' mnist_models = torch.load("mnist.pt") model = MegaSizer(ViT('B_16', pretrained=True)).to(dev) trs = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: torch.cat([x] * 3)) ]) ds = MNIST('./data', train=True, transform=trs, download=True) dl = DataLoader(ds, 32, True, num_workers=8) opt = torch.optim.Adam(model.parameters(), eps=1e-3) for epoch in range(20): prog = tqdm.tqdm(dl) for x, y in prog: y_cpu = y.detach().cpu().numpy() x, y = x.to(dev), y.to(dev) opt.zero_grad() logit = model(x)
valid_dataset = p1_data(valid_dir, mode='valid', transform=test_tfm) # Create the dataloader train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=workers) valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=workers) # Decide which device we want to run on device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu") model = ViT('B_16_imagenet1k', pretrained=True, num_classes=num_classes) model = model.to(device) print(model) # Initialize Loss function criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay) # Training Loop train_losses = [] for epoch in range(num_epochs): # ---------- Training ---------- # Make sure the model is in train mode before training.
def main_worker(gpu, ngpus_per_node, args): global best_acc1 args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: args.rank = int(os.environ["RANK"]) if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) # NEW if args.vit: model = ViT(args.arch, pretrained=args.pretrained) # # NOTE: This is for debugging # model = ViT('B_16_imagenet1k', pretrained=False) # load_pretrained_weights(model, weights_path='/home/luke/projects/experiments/ViT-PyTorch/jax_to_pytorch/weights/B_16_imagenet1k.pth') else: model = models.__dict__[args.arch](pretrained=args.pretrained) print("=> using model '{}' (pretrained={})".format(args.arch, args.pretrained)) if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int(args.workers / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu]) else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: # DataParallel will divide and allocate batch_size to all available GPUs if args.arch.startswith('alexnet') or args.arch.startswith('vgg'): model.features = torch.nn.DataParallel(model.features) model.cuda() else: model = torch.nn.DataParallel(model).cuda() # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda(args.gpu) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc1 = checkpoint['best_acc1'] if args.gpu is not None: # best_acc1 may be from a checkpoint from a different GPU best_acc1 = best_acc1.to(args.gpu) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code traindir = os.path.join(args.data, 'train') valdir = os.path.join(args.data, 'val') # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) normalize = transforms.Normalize(0.5, 0.5) train_dataset = datasets.ImageFolder( traindir, transforms.Compose([ transforms.RandomResizedCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ])) if args.distributed: train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) else: train_sampler = None train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None), num_workers=args.workers, pin_memory=True, sampler=train_sampler) val_transforms = transforms.Compose([ transforms.Resize(args.image_size, interpolation=PIL.Image.BICUBIC), transforms.CenterCrop(args.image_size), transforms.ToTensor(), normalize, ]) print('Using image size', args.image_size) val_loader = torch.utils.data.DataLoader(datasets.ImageFolder( valdir, val_transforms), batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if args.evaluate: res = validate(val_loader, model, criterion, args) with open('res.txt', 'w') as f: print(res, file=f) return for epoch in range(args.start_epoch, args.epochs): if args.distributed: train_sampler.set_epoch(epoch) adjust_learning_rate(optimizer, epoch, args) # train for one epoch train(train_loader, model, criterion, optimizer, epoch, args) # evaluate on validation set acc1 = validate(val_loader, model, criterion, args) # remember best acc@1 and save checkpoint is_best = acc1 > best_acc1 best_acc1 = max(acc1, best_acc1) if not args.multiprocessing_distributed or ( args.multiprocessing_distributed and args.rank % ngpus_per_node == 0): save_checkpoint( { 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_acc1': best_acc1, 'optimizer': optimizer.state_dict(), }, is_best)