コード例 #1
0
ファイル: visual_backbones.py プロジェクト: wilson1yan/virtex
class ViTVisualBackbone(VisualBackbone):
    def __init__(
        self,
        name: str = 'B_32',
        visual_feature_size: int = 768,
        pretrained: bool = True,
        frozen: bool = False,
        image_size: int = 224,
    ):
        super().__init__(visual_feature_size)

        self.cnn = ViT(name,
                       pretrained=pretrained,
                       image_size=(image_size, image_size))
        del self.cnn.norm
        del self.cnn.fc
        self.patch_size = round(
            math.sqrt(self.cnn.positional_embedding.pos_embedding.shape[1]))

        if frozen:
            for param in self.cnn.parameters():
                param.requires_grad = False
            self.cnn.eval()

    def forward(self, image: torch.Tensor):
        out = self.cnn(image)[:, 1:]  # remove cls token
        out = out.permute(0, 2, 1).contiguous()  # NLD -> NDL
        B, D, _ = out.shape
        out = out.view(B, D, self.patch_size, self.patch_size)
        return out
コード例 #2
0
ファイル: models.py プロジェクト: Mirai-Gadget-Lab/animesion
    def __init__(self, num_classes, args):
        super(VisionTransformer, self).__init__()
        self.num_classes = num_classes

        if hasattr(args, 'vis_attention'):
            base_model = ViT(name=args.model_type,
                             pretrained=args.pretrained,
                             num_classes=self.num_classes,
                             image_size=args.image_size,
                             visualize=True)
        else:
            base_model = ViT(name=args.model_type,
                             pretrained=args.pretrained,
                             num_classes=self.num_classes,
                             image_size=args.image_size)

        self.model = base_model
コード例 #3
0
ファイル: learner.py プロジェクト: praveen-14/one-shot
	def __init__(self):
		super(Model, self).__init__()
		# self.backbone = models.resnet18(pretrained=True).to(device)
		self.backbone = model = ViT('B_16_imagenet1k', pretrained=True).to(device)
		for name, param in self.backbone.named_parameters():
			param.requires_grad = False
			# print(name, param.shape)

		# self.backbone.fc = nn.Linear(512, 10).to(device)
		self.backbone.fc = nn.Linear(768, 10).to(device)
コード例 #4
0
    def _make(cls) -> modelViT:
        """
        Download model from ViT library to local file storage
        :return: ViT model
        """

        return ViT(
            name=cls._type,
            image_size=cls._image_size,
            pretrained=True,
        )
コード例 #5
0
ファイル: visual_backbones.py プロジェクト: wilson1yan/virtex
    def __init__(
        self,
        name: str = 'B_32',
        visual_feature_size: int = 768,
        pretrained: bool = True,
        frozen: bool = False,
        image_size: int = 224,
    ):
        super().__init__(visual_feature_size)

        self.cnn = ViT(name,
                       pretrained=pretrained,
                       image_size=(image_size, image_size))
        del self.cnn.norm
        del self.cnn.fc
        self.patch_size = round(
            math.sqrt(self.cnn.positional_embedding.pos_embedding.shape[1]))

        if frozen:
            for param in self.cnn.parameters():
                param.requires_grad = False
            self.cnn.eval()
コード例 #6
0
def get_model(name):
    assert name in [
        'ViT_B_16_imagenet1k', 'ViT_B_32_imagenet1k', 'ViT_L_16_imagenet1k',
        'ViT_L_32_imagenet1k', 'ViT_B_16', 'ViT_B_32', 'ViT_L_32'
    ]
    name = name[4:]
    model = ViT(name, pretrained=True)
    preprocessing = functools.partial(load_preprocess_images,
                                      image_size=model.image_size[0])
    wrapper = PytorchWrapper(identifier=name,
                             model=model,
                             preprocessing=preprocessing)
    wrapper.image_size = model.image_size[0]
    return wrapper
コード例 #7
0
def model_vit_sizes(
    image_size: int = MODEL_INPUT,
    name: str = MODEL_VIT_TYPE,
    pretrained: bool = True,
):
    """
    Print models parameters
    :param name: one of ViT model names
    :param image_size: input image size
    :param pretrained: flag for pretrained model
    :return:
    """
    model = ViT(
        name=name,
        image_size=image_size,
        pretrained=pretrained,
    )

    print(f'ViT model {name} with input size {image_size}')
    print(
        f'Model parameters: {sum([param.nelement() for param in model.parameters()])}'
    )
コード例 #8
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from pytorch_pretrained_vit import ViT

model_path = './p1_model.pth'
num_classes = 37

# Decide which device we want to run on
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

model = ViT('B_16_imagenet1k', pretrained=True, num_classes=num_classes)
model = model.to(device)
model.load_state_dict(torch.load(model_path, map_location=device))

# size = (1, 577, 768)
s = model.positional_embedding.pos_embedding.shape

# remove first patch
# size = (576, 768)
pos_patch = model.positional_embedding.pos_embedding.view(*s[1:])[:-1].view(
    24 * 24, -1)

# Visualize position embedding similarities.
# One cell shows cos similarity between an embedding and all the other embeddings.
pos_embed = model.positional_embedding.pos_embedding[0, 1:]  #size=(576, 768)
cos = torch.nn.CosineSimilarity(dim=1, eps=1e-6)
fig, axes = plt.subplots(figsize=(12, 10), nrows=24, ncols=24)
コード例 #9
0
ファイル: main.py プロジェクト: tortueTortue/SKP_Transformer
    # save_model(train_model(epochs, transformer, "Transformer", imagenette_data, batch_size, model_dir), "Transformer", model_dir)
    # transformer = load_model(f"E:/Git/SKP_Transformer/models/trained_models/Transformer.pt")
    # print(f"Training time for {epochs} epochs : {time.time() - start_time}")
    # print_accuracy_per_class(transformer, classes, batch_size, imagenette_data.test_loader)
    # print_accuracy(transformer, classes, batch_size, imagenette_data.test_loader)

    # ViTPretrained = ViT('B_16_imagenet1k', pretrained=True, num_classes=10) #, patches=4, num_classes=10, dim=64)
    # print(f"Parameters {count_model_parameters(ViTPretrained, False)}")
    # start_time = time.time()
    # save_model(train_model(epochs, ViTPretrained, "ViTPretrained", imagenette_data, batch_size, model_dir), "ViTPretrained", model_dir)
    # ViTPretrained = load_model(f"E:/Git/SKP_Transformer/models/trained_models/ViTPretrained.pt")
    # print(f"Training time for {epochs} epochs : {time.time() - start_time}")
    # print_accuracy_per_class(ViTPretrained, classes, batch_size, imagenette_data.test_loader)
    # print_accuracy(ViTPretrained, classes, batch_size, imagenette_data.test_loader)

    ViTFromScratch = ViT('B_16_imagenet1k', pretrained=False,
                         num_classes=10)  #, patches=4 dim=64)
    print(f"Parameters {count_model_parameters(ViTFromScratch, False)}")
    start_time = time.time()
    save_model(
        train_model(epochs, ViTFromScratch, "ViTFromScratch", imagenette_data,
                    batch_size, model_dir), "ViTFromScratch", model_dir)
    ViTFromScratch = load_model(
        f"E:/Git/SKP_Transformer/models/trained_models/ViTFromScratch.pt")
    print(f"Training time for {epochs} epochs : {time.time() - start_time}")
    print_accuracy_per_class(ViTFromScratch, classes, batch_size,
                             imagenette_data.test_loader)
    print_accuracy(ViTFromScratch, classes, batch_size,
                   imagenette_data.test_loader)

    # NA TRANSFORMER 100 epochs
    # model_name = "na_transformer"
コード例 #10
0
class MegaSizer(torch.nn.Module):
    def __init__(self, wrap):
        super().__init__()
        self.wrap = wrap
        self.sizer = transforms.Resize(224,
                                       interpolation=InterpolationMode.BICUBIC)

    def forward(self, x):
        x = self.sizer(x)
        return self.wrap(x)


if __name__ == '__main__':
    dev = 'cuda:1'
    mnist_models = torch.load("mnist.pt")
    model = MegaSizer(ViT('B_16', pretrained=True)).to(dev)

    trs = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda x: torch.cat([x] * 3))
    ])
    ds = MNIST('./data', train=True, transform=trs, download=True)
    dl = DataLoader(ds, 32, True, num_workers=8)
    opt = torch.optim.Adam(model.parameters(), eps=1e-3)
    for epoch in range(20):
        prog = tqdm.tqdm(dl)
        for x, y in prog:
            y_cpu = y.detach().cpu().numpy()
            x, y = x.to(dev), y.to(dev)
            opt.zero_grad()
            logit = model(x)
コード例 #11
0
ファイル: p1_train.py プロジェクト: yiwei32/NTU_courses
valid_dataset = p1_data(valid_dir, mode='valid', transform=test_tfm)

# Create the dataloader
train_loader = DataLoader(train_dataset,
                          batch_size=batch_size,
                          shuffle=True,
                          num_workers=workers)
valid_loader = DataLoader(valid_dataset,
                          batch_size=batch_size,
                          shuffle=False,
                          num_workers=workers)

# Decide which device we want to run on
device = torch.device("cuda" if (torch.cuda.is_available()) else "cpu")

model = ViT('B_16_imagenet1k', pretrained=True, num_classes=num_classes)
model = model.to(device)
print(model)

# Initialize Loss function
criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

# Training Loop
train_losses = []

for epoch in range(num_epochs):

    # ---------- Training ----------
    # Make sure the model is in train mode before training.
コード例 #12
0
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size,
                                rank=args.rank)

    # NEW
    if args.vit:
        model = ViT(args.arch, pretrained=args.pretrained)

        # # NOTE: This is for debugging
        # model = ViT('B_16_imagenet1k', pretrained=False)
        # load_pretrained_weights(model, weights_path='/home/luke/projects/experiments/ViT-PyTorch/jax_to_pytorch/weights/B_16_imagenet1k.pth')

    else:
        model = models.__dict__[args.arch](pretrained=args.pretrained)
    print("=> using model '{}' (pretrained={})".format(args.arch,
                                                       args.pretrained))

    if args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int(args.workers / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    # normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    normalize = transforms.Normalize(0.5, 0.5)

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(args.image_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=args.batch_size,
                                               shuffle=(train_sampler is None),
                                               num_workers=args.workers,
                                               pin_memory=True,
                                               sampler=train_sampler)

    val_transforms = transforms.Compose([
        transforms.Resize(args.image_size, interpolation=PIL.Image.BICUBIC),
        transforms.CenterCrop(args.image_size),
        transforms.ToTensor(),
        normalize,
    ])
    print('Using image size', args.image_size)

    val_loader = torch.utils.data.DataLoader(datasets.ImageFolder(
        valdir, val_transforms),
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=args.workers,
                                             pin_memory=True)

    if args.evaluate:
        res = validate(val_loader, model, criterion, args)
        with open('res.txt', 'w') as f:
            print(res, file=f)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)
        adjust_learning_rate(optimizer, epoch, args)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch, args)

        # evaluate on validation set
        acc1 = validate(val_loader, model, criterion, args)

        # remember best acc@1 and save checkpoint
        is_best = acc1 > best_acc1
        best_acc1 = max(acc1, best_acc1)

        if not args.multiprocessing_distributed or (
                args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer': optimizer.state_dict(),
                }, is_best)