Exemple #1
0
 def __init__(self, **kwargs):
     super().__init__()
     self.contains_unknown = kwargs["contains_unknown"]
     
     if self.contains_unknown:
         self.classifier = ViT(
             image_size = 300,
             patch_size = 1,
             num_classes = 3,
             dim = 128,
             depth = 12,
             heads = 8,
             channels = 1,
             mlp_dim = 256,
             dropout = 0.1,
             emb_dropout = 0.1
         )
     else:
         self.classifier = ViT(
             image_size = 300,
             patch_size = 1,
             num_classes = 2,
             dim = 128,
             depth = 12,
             heads = 8,
             channels = 1,
             mlp_dim = 256,
             dropout = 0.1,
             emb_dropout = 0.1
         )
Exemple #2
0
    class VisualTrans(nn.Module):
        def __init__(self, file_path):
            super(VisualTrans, self).__init__()

            self.file_path = file_path

            self.model = ViT_modified(
                n_classes=1,
                image_size=(1,
                            962),  # image size is a tuple of (height, width)
                patch_size=(1, 13),  # patch size is a tuple of (height, width)
                dim=16,
                depth=3,
                heads=16,
                mlp_dim=512,
                dropout=0.1,
                emb_dropout=0.1)

            state_dict = torch.load(self.file_path, map_location='cpu')
            new_state_dict = OrderedDict()

            try:
                self.model.load_state_dict(state_dict)
            except RuntimeError as e:
                print('Ignoring test_dataset_size "' + str(e) + '"')

        def forward(self, inpt):
            theta, x = inpt
            theta = theta.unsqueeze_(1).unsqueeze_(1)
            x = x.unsqueeze_(1).unsqueeze_(1)
            x = torch.nn.functional.pad(x, (0, 2))
            inp = torch.cat((theta, x), 3)

            out = self.model(inp)[0]  #another [0]- when the n=2
            return out
Exemple #3
0
    def __init__(self, face_recognition_cnn_path=None):
        super(Encoder2DViT, self).__init__()

        face_cnn = FaceRecognitionCNN()

        if face_recognition_cnn_path is not None:
            face_cnn = nn.DataParallel(face_cnn)
            state_dict = torch.load(face_recognition_cnn_path,
                                    map_location='cpu')
            face_cnn.load_state_dict(state_dict)

        if face_recognition_cnn_path:
            modules = list(face_cnn.module.resnet.children())[:-12]
            self.encoder2d = nn.Sequential(*modules)

        else:
            modules = list(face_cnn.resnet.children())[:-12]
            self.encoder2d = nn.Sequential(*modules)

        del face_cnn

        self.vit = ViT(image_size=IMG_SIZE,
                       patch_size=PATCH_SIZE,
                       num_classes=5,
                       dim=DIM,
                       depth=DEPTH,
                       heads=HEADS,
                       mlp_dim=MLP_DIM,
                       dropout=0.1,
                       emb_dropout=0.1,
                       channels=CHANNELS)
Exemple #4
0
    def __init__(self):
        super(Encoder, self).__init__()

        self.conv1 = nn.Conv2d(3, 96, kernel_size=3, stride=1, padding=1)
        self.pool1 = nn.MaxPool2d(2)
        self.relu1 = nn.LeakyReLU()
        self.conv2 = nn.Conv2d(96, 128, kernel_size=3, stride=1, padding=1)
        self.pool2 = nn.MaxPool2d(2)
        self.relu2 = nn.LeakyReLU()
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.pool3 = nn.MaxPool2d(2)
        self.relu3 = nn.LeakyReLU()
        '''
        self.conv4 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool4 = nn.MaxPool2d(2)
        self.relu4 = nn.LeakyReLU()        
        self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool5 = nn.MaxPool2d(2)
        self.relu5 = nn.LeakyReLU()        
        self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
        self.pool6 = nn.MaxPool2d(2)
        self.relu6 = nn.LeakyReLU()        
        self.fc1 = nn.Linear(1024, 1024)
        self.relu7 = nn.LeakyReLU()
        '''

        self.conv8 = nn.Conv2d(256, 3, kernel_size=3, stride=1, padding=1)
        self.v1 = ViT(image_size=128,
                      patch_size=32,
                      num_classes=768,
                      dim=1024,
                      depth=6,
                      heads=16,
                      mlp_dim=2048,
                      dropout=0.1,
                      emb_dropout=0.1)

        self.v2 = ViT(image_size=16,
                      patch_size=4,
                      num_classes=64,
                      dim=1024,
                      depth=6,
                      heads=16,
                      mlp_dim=2048,
                      dropout=0.1,
                      emb_dropout=0.1)
Exemple #5
0
 def __init__(self):
     super(Model, self).__init__()
     v = ViT(image_size=16,
             patch_size=16,
             num_classes=16**2,
             dim=256,
             depth=1,
             heads=1,
             mlp_dim=256)
     self.transformer = v
     self.head = torch.nn.Linear(256, 3 * 32 * 32)
 def __init__(self, num_classes):
     super(vit, self).__init__()
     self.Vtrans = ViT(image_size=224,
                       patch_size=32,
                       num_classes=12,
                       dim=1024,
                       depth=8,
                       heads=16,
                       mlp_dim=2048,
                       dropout=0.1,
                       emb_dropout=0.1,
                       channels=3)
Exemple #7
0
def test():
    v = ViT(
        image_size = 256,
        patch_size = 32,
        num_classes = 1000,
        dim = 1024,
        depth = 6,
        heads = 16,
        mlp_dim = 2048,
        dropout = 0.1,
        emb_dropout = 0.1
    )

    img = torch.randn(1, 3, 256, 256)

    preds = v(img)
    assert preds.shape == (1, 1000), 'correct logits outputted'
Exemple #8
0
def Objective(trial):

    dim = trial.suggest_categorical('dim', [32, 64, 128])
    #patch_size = trial.suggest_int('patch_size',7, 14, 7)
    patch_size = 7
    depth = trial.suggest_categorical('depth', [8, 16, 32])
    heads = trial.suggest_categorical('heads', [8, 16, 32])
    mlp_dim = trial.suggest_categorical('mlp_dim', [128, 512, 1024])
    optimizer_name = trial.suggest_categorical("optimizer",
                                               ["Adam", "RMSprop"])
    lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
    print('dim:', dim, 'mlp_dim:', mlp_dim, 'depth:', depth, 'heads:', heads)
    model = ViT(
        dim=dim,
        image_size=28,
        patch_size=patch_size,
        num_classes=10,
        depth=depth,  # number of transformer blocks
        heads=heads,  # number of multi-channel attention
        mlp_dim=mlp_dim,
        channels=1,
        #dropout=0.2,
    )

    # vanila cnn : 0.96
    # model = Net()
    model.to(device)
    criterion = nn.CrossEntropyLoss()
    # optimizer
    #optimizer = optim.Adam(model.parameters(), lr=0.001)
    optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

    # scheduler
    scheduler = StepLR(optimizer, step_size=1, gamma=gamma)

    for epoch in range(1, epochs + 1):
        train(model, criterion, device, train_loader, optimizer, epoch)
        val_acc = test(model, device, test_loader)
        scheduler.step()

        if 0:
            torch.save(model.state_dict(), "mnist_cnn.pt")

        trial.report(val_acc, epoch)

        # Handle pruning based on the intermediate value.
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    wandb.log({'val_acc': val_acc})
    return val_acc
Exemple #9
0
 def __init__(self,
              image_size,
              patch_size,
              num_classes,
              dim=1024,
              depth=6,
              heads=16,
              mlp_dim=2048,
              dropout=0.1,
              emb_dropout=0.1):
     super(BaseVit, self).__init__()
     self.vit = ViT(image_size=image_size,
                    patch_size=patch_size,
                    num_classes=num_classes,
                    dim=dim,
                    depth=depth,
                    heads=heads,
                    mlp_dim=mlp_dim,
                    dropout=dropout,
                    emb_dropout=emb_dropout)
    def __init__(self,
                 width: int,
                 height: int,
                 action_dim: int,
                 motion_blur: int = 4):
        super(PolicyModelVIT, self).__init__()

        self.width = width
        self.height = height
        self.motion_blur = motion_blur

        self.image_encoder = ViT(image_size=64,
                                 patch_size=8,
                                 num_classes=action_dim,
                                 dim=128,
                                 depth=2,
                                 channels=4,
                                 heads=3,
                                 mlp_dim=256,
                                 dropout=0,
                                 emb_dropout=0)
Exemple #11
0
        def __init__(self, file_path):
            super(VisualTrans, self).__init__()

            self.file_path = file_path

            self.model = ViT_modified(
                n_classes=1,
                image_size=(1,
                            962),  # image size is a tuple of (height, width)
                patch_size=(1, 13),  # patch size is a tuple of (height, width)
                dim=16,
                depth=3,
                heads=16,
                mlp_dim=512,
                dropout=0.1,
                emb_dropout=0.1)

            state_dict = torch.load(self.file_path, map_location='cpu')
            new_state_dict = OrderedDict()

            try:
                self.model.load_state_dict(state_dict)
            except RuntimeError as e:
                print('Ignoring test_dataset_size "' + str(e) + '"')
#    dim=768,
##    seq_len=49+1,  # 7x7 patches + 1 cls-token
#    seq_len=4+1,  # 2x2 patches + 1 cls-token
##    depth=1,
#    depth=12,
##    heads=8,
#    heads=12,
#    k=256
#)

# ViT-B
model = ViT(
    dim=768,
    depth=12,
    heads=12,
    mlp_dim=3072,
    image_size=128,
    patch_size=16,
    num_classes=8,
    channels=3,
).to(device)

## ViT-L
#model = ViT(
#    dim = 1024,
#    depth = 24,
#    heads = 16,
#    mlp_dim = 4096,
#    image_size=128,
#    patch_size=16,
#    num_classes=8,
#    channels=3,
    def __init__(self,
                 model_train='tf_efficientnetv2_b0',
                 num_classes=3,
                 diffaug_activate=False,
                 policy='color,translation',
                 aug=None):
        super().__init__()

        #############################################
        if model_train == 'efficientnet-b0':
            self.netD = EfficientNet.from_pretrained('efficientnet-b0',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b1':
            self.netD = EfficientNet.from_pretrained('efficientnet-b1',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b2':
            self.netD = EfficientNet.from_pretrained('efficientnet-b2',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b3':
            self.netD = EfficientNet.from_pretrained('efficientnet-b3',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b4':
            self.netD = EfficientNet.from_pretrained('efficientnet-b4',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b5':
            self.netD = EfficientNet.from_pretrained('efficientnet-b5',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b6':
            self.netD = EfficientNet.from_pretrained('efficientnet-b6',
                                                     num_classes=num_classes)
        elif model_train == 'efficientnet-b7':
            self.netD = EfficientNet.from_pretrained('efficientnet-b7',
                                                     num_classes=num_classes)

        elif model_train == 'mobilenetv3_small':
            from arch.mobilenetv3_arch import MobileNetV3
            self.netD = MobileNetV3(n_class=num_classes,
                                    mode='small',
                                    input_size=256)
        elif model_train == 'mobilenetv3_large':
            from arch.mobilenetv3_arch import MobileNetV3
            self.netD = MobileNetV3(n_class=num_classes,
                                    mode='large',
                                    input_size=256)

        elif model_train == 'resnet50':
            from arch.resnet_arch import resnet50
            self.netD = resnet50(num_classes=num_classes,
                                 pretrain=cfg['pretrain'])
        elif model_train == 'resnet101':
            from arch.resnet_arch import resnet101
            self.netD = resnet101(num_classes=num_classes,
                                  pretrain=cfg['pretrain'])
        elif model_train == 'resnet152':
            from arch.resnet_arch import resnet152
            self.netD = resnet152(num_classes=num_classes,
                                  pretrain=cfg['pretrain'])

        #############################################
        elif model_train == 'ViT':
            from vit_pytorch import ViT
            self.netD = ViT(image_size=256,
                            patch_size=32,
                            num_classes=num_classes,
                            dim=1024,
                            depth=6,
                            heads=16,
                            mlp_dim=2048,
                            dropout=0.1,
                            emb_dropout=0.1)

        elif model_train == 'DeepViT':
            from vit_pytorch.deepvit import DeepViT
            self.netD = DeepViT(image_size=256,
                                patch_size=32,
                                num_classes=num_classes,
                                dim=1024,
                                depth=6,
                                heads=16,
                                mlp_dim=2048,
                                dropout=0.1,
                                emb_dropout=0.1)

        #############################################

        elif model_train == 'RepVGG-A0':
            from arch.RepVGG_arch import create_RepVGG_A0
            self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-A1':
            from arch.RepVGG_arch import create_RepVGG_A1
            self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-A2':
            from arch.RepVGG_arch import create_RepVGG_A2
            self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B0':
            from arch.RepVGG_arch import create_RepVGG_B0
            self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B1':
            from arch.RepVGG_arch import create_RepVGG_B1
            self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B1g2':
            from arch.RepVGG_arch import create_RepVGG_B1g2
            self.netD = create_RepVGG_B1g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B1g4':
            from arch.RepVGG_arch import create_RepVGG_B1g4
            self.netD = create_RepVGG_B1g4(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B2':
            from arch.RepVGG_arch import create_RepVGG_B2
            self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B2g2':
            from arch.RepVGG_arch import create_RepVGG_B2g2
            self.netD = create_RepVGG_B2g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B2g4':
            from arch.RepVGG_arch import create_RepVGG_B2g4
            self.netD = create_RepVGG_B2g4(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B3':
            from arch.RepVGG_arch import create_RepVGG_B3
            self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes)

        elif model_train == 'RepVGG-B3g2':
            from arch.RepVGG_arch import create_RepVGG_B3g2
            self.netD = create_RepVGG_B3g2(deploy=False,
                                           num_classes=num_classes)

        elif model_train == 'RepVGG-B3g4':
            from arch.RepVGG_arch import create_RepVGG_B3g4
            self.netD = create_RepVGG_B3g4(deploy=False,
                                           num_classes=num_classes)

        #############################################

        elif model_train == 'squeezenet_1_0':
            from arch.squeezenet_arch import SqueezeNet
            self.netD = SqueezeNet(num_classes=num_classes, version='1_0')

        elif model_train == 'squeezenet_1_1':
            from arch.squeezenet_arch import SqueezeNet
            self.netD = SqueezeNet(num_classes=num_classes, version='1_1')
        #############################################
        elif model_train == 'vgg11':
            from arch.vgg_arch import create_vgg11
            self.netD = create_vgg11(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg13':
            from arch.vgg_arch import create_vgg13
            self.netD = create_vgg13(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg16':
            from arch.vgg_arch import create_vgg16
            self.netD = create_vgg16(num_classes, pretrained=cfg['pretrain'])
        elif model_train == 'vgg19':
            from arch.vgg_arch import create_vgg19
            self.netD = create_vgg19(num_classes, pretrained=cfg['pretrain'])

        #############################################
        elif model_train == 'SwinTransformer':
            from swin_transformer_pytorch import SwinTransformer

            self.netD = SwinTransformer(hidden_dim=96,
                                        layers=(2, 2, 6, 2),
                                        heads=(3, 6, 12, 24),
                                        channels=3,
                                        num_classes=num_classes,
                                        head_dim=32,
                                        window_size=8,
                                        downscaling_factors=(4, 2, 2, 2),
                                        relative_pos_embedding=True)

        elif model_train == 'effV2':
            if cfg['size'] == "s":
                from arch.efficientnetV2_arch import effnetv2_s
                self.netD = effnetv2_s(num_classes=num_classes)
            elif cfg['size'] == "m":
                from arch.efficientnetV2_arch import effnetv2_m
                self.netD = effnetv2_m(num_classes=num_classes)
            elif cfg['size'] == "l":
                from arch.efficientnetV2_arch import effnetv2_l
                self.netD = effnetv2_l(num_classes=num_classes)
            elif cfg['size'] == "xl":
                from arch.efficientnetV2_arch import effnetv2_xl
                self.netD = effnetv2_xl(num_classes=num_classes)

        elif model_train == 'x_transformers':
            from x_transformers import ViTransformerWrapper, Encoder
            self.netD = ViTransformerWrapper(image_size=cfg['image_size'],
                                             patch_size=cfg['patch_size'],
                                             num_classes=num_classes,
                                             attn_layers=Encoder(
                                                 dim=cfg['dim'],
                                                 depth=cfg['depth'],
                                                 heads=cfg['heads'],
                                             ))

        elif model_train == 'mobilevit':
            if cfg['model_size'] == "xxs":
                from arch.mobilevit_arch import mobilevit_xxs
                self.netD = mobilevit_xxs(num_classes=num_classes)
            elif cfg['model_size'] == "xs":
                from arch.mobilevit_arch import mobilevit_xs
                self.netD = mobilevit_xs(num_classes=num_classes)
            elif cfg['model_size'] == "x":
                from arch.mobilevit_arch import mobilevit_s
                self.netD = mobilevit_s(num_classes=num_classes)

        elif model_train == 'hrt':
            from arch.hrt_arch import HighResolutionTransformer
            self.netD = HighResolutionTransformer(num_classes)

        elif model_train == 'volo':
            if cfg['model_size'] == "volo_d1":
                from arch.volo_arch import volo_d1
                self.netD = volo_d1(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d2":
                from arch.volo_arch import volo_d2
                self.netD = volo_d2(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d3":
                from arch.volo_arch import volo_d3
                self.netD = volo_d3(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d4":
                from arch.volo_arch import volo_d4
                self.netD = volo_d4(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)
            elif cfg['model_size'] == "volo_d5":
                from arch.volo_arch import volo_d5
                self.netD = volo_d5(pretrained=cfg['pretrain'],
                                    num_classes=num_classes)

        elif model_train == 'pvt_v2':
            if cfg['model_size'] == "pvt_v2_b0":
                from arch.pvt_v2_arch import pvt_v2_b0
                self.netD = pvt_v2_b0(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b1":
                from arch.pvt_v2_arch import pvt_v2_b1
                self.netD = pvt_v2_b1(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b2":
                from arch.pvt_v2_arch import pvt_v2_b2
                self.netD = pvt_v2_b2(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b3":
                from arch.pvt_v2_arch import pvt_v2_b3
                self.netD = pvt_v2_b3(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b4":
                from arch.pvt_v2_arch import pvt_v2_b4
                self.netD = pvt_v2_b4(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b5":
                from arch.pvt_v2_arch import pvt_v2_b5
                self.netD = pvt_v2_b5(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "pvt_v2_b2_li":
                from arch.pvt_v2_arch import pvt_v2_b2_li
                self.netD = pvt_v2_b2_li(pretrained=cfg['pretrain'],
                                         num_classes=num_classes)

        elif model_train == 'ConvMLP':
            if cfg['model_size'] == "convmlp_s":
                from arch.ConvMLP_arch import convmlp_s
                self.netD = convmlp_s(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "convmlp_m":
                from arch.ConvMLP_arch import convmlp_m
                self.netD = convmlp_m(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)
            elif cfg['model_size'] == "convmlp_l":
                from arch.ConvMLP_arch import convmlp_l
                self.netD = convmlp_l(pretrained=cfg['pretrain'],
                                      num_classes=num_classes)

        elif model_train == 'FocalTransformer':
            from arch.focal_transformer_arch import FocalTransformer
            self.netD = FocalTransformer(num_classes=num_classes)

        elif model_train == 'mobile_former':
            from arch.mobile_former_arch import MobileFormer, config_52, config_294, config_508
            if cfg['model_size'] == "config_52":
                self.netD = MobileFormer(config_52)
            elif cfg['model_size'] == "config_294":
                self.netD = MobileFormer(config_294)
            elif cfg['model_size'] == "config_508":
                self.netD = MobileFormer(config_508)

        elif model_train == 'poolformer':
            if cfg['model_size'] == "poolformer_s12":
                from arch.poolformer_arch import poolformer_s12
                self.netD = poolformer_s12(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_s24":
                from arch.poolformer_arch import poolformer_s24
                self.netD = poolformer_s24(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_s36":
                from arch.poolformer_arch import poolformer_s36
                self.netD = poolformer_s36(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_m36":
                from arch.poolformer_arch import poolformer_m36
                self.netD = poolformer_m36(pretrained=True,
                                           num_classes=num_classes)
            if cfg['model_size'] == "poolformer_m48":
                from arch.poolformer_arch import poolformer_m48
                self.netD = poolformer_m48(pretrained=True,
                                           num_classes=num_classes)

        elif model_train == 'timm':
            import timm
            self.netD = timm.create_model(cfg['model_choise'],
                                          num_classes=num_classes,
                                          pretrained=True)

        #weights_init(self.netD, 'kaiming') #only use this if there is no pretrain

        if aug == 'gridmix':
            from GridMixupLoss import GridMixupLoss
            self.criterion = GridMixupLoss(alpha=(0.4, 0.7),
                                           hole_aspect_ratio=1.,
                                           crop_area_ratio=(0.5, 1),
                                           crop_aspect_ratio=(0.5, 2),
                                           n_holes_x=(2, 6))
        elif aug == 'cutmix':
            from cutmix import cutmix
            self.criterion = cutmix(alpha=(0.4, 0.7),
                                    hole_aspect_ratio=1.,
                                    crop_area_ratio=(0.5, 1),
                                    crop_aspect_ratio=(0.5, 2),
                                    n_holes_x=(2, 6))

        self.aug = aug

        if cfg['loss'] == 'CenterLoss':
            from centerloss import CenterLoss
            self.criterion = CenterLoss(num_classes=num_classes,
                                        feat_dim=2,
                                        use_gpu=True)
        elif cfg['loss'] == 'normal':
            self.criterion = torch.nn.CrossEntropyLoss()

        self.accuracy = []
        self.losses = []
        self.diffaug_activate = diffaug_activate
        self.accuracy_val = []
        self.losses_val = []

        self.policy = policy
        self.iter_check = 0

        if cfg['aug'] == 'MuAugment':
            rand_augment = BatchRandAugment(N_TFMS=3,
                                            MAGN=3,
                                            mean=cfg['means'],
                                            std=cfg['std'])
            self.mu_transform = MuAugment(rand_augment,
                                          N_COMPS=4,
                                          N_SELECTED=2)
import torch
from vit_pytorch import ViT

v = ViT(image_size=256,
        patch_size=32,
        num_classes=7,
        dim=1024,
        depth=6,
        heads=16,
        mlp_dim=2048,
        dropout=0.1,
        emb_dropout=0.1)

img = torch.randn(1, 3, 256, 256)

preds = v(img)  # (1, 1000)

print(preds)
Exemple #15
0
def main():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--image',
                        default='dataset/test/0_000.png',
                        help='image to be classified')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        metavar='S',
                        help='learning rate (default: 1e-3)')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training (default: 128)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--layer-num',
                        type=int,
                        default=0,
                        metavar='N',
                        help='which layer to visualize (default: 0)')
    parser.add_argument(
        '--feature-num',
        type=int,
        default=0,
        metavar='N',
        help='which feature of a layer to visualize (default: 0)')
    parser.add_argument('--train',
                        action='store_true',
                        default=False,
                        help='train the model (default: False)')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='save the current model (default: False)')
    parser.add_argument('--restore-model',
                        default=None,
                        help='restore & eval this model file (default: False)')
    parser.add_argument('--normalize',
                        action='store_true',
                        default=False,
                        help='normalize input dataset (default: False)')
    parser.add_argument(
        '--cnn',
        action='store_true',
        default=False,
        help='use cnn model instead of transformer (default: False)')
    parser.add_argument('--visualize',
                        action='store_true',
                        default=False,
                        help='plot kernel and feature maps (default: False)')

    args = parser.parse_args()
    use_cuda = torch.cuda.is_available()

    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

    if args.normalize:
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
    else:
        transform = transforms.Compose([transforms.ToTensor()])

    x_train = datasets.MNIST(root='./data',
                             train=True,
                             download=True,
                             transform=transform)

    x_test = datasets.MNIST(root='./data',
                            train=False,
                            download=True,
                            transform=transform)

    DataLoader = torch.utils.data.DataLoader
    train_loader = DataLoader(x_train,
                              shuffle=True,
                              batch_size=args.batch_size,
                              **kwargs)

    test_loader = DataLoader(x_test,
                             shuffle=False,
                             batch_size=args.batch_size,
                             **kwargs)

    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    if args.cnn:
        model = CNNModel().to(device)
    else:
        model = ViT(
            image_size=28,
            patch_size=14,
            num_classes=10,
            dim=128,
            depth=6,
            heads=8,
            mlp_dim=128,
            channels=1,
        ).to(device)

    if torch.cuda.device_count() > 1:
        print("Available GPUs:", torch.cuda.device_count())
        model = nn.DataParallel(model)
    print("Model:", model)
    print("Device:", device)
    optimizer = optim.Adam(model.parameters())

    start_time = datetime.datetime.now()
    best_top1 = 0
    best_top5 = 0
    if args.restore_model is not None:
        model.load_state_dict(torch.load(args.restore_model))
        best_top1, best_top5 = test(args, model, device, test_loader)
        print("Best Top 1: %0.2f%%, Top 5: %0.2f%%" % (best_top1, best_top5))

    if args.train:
        for epoch in range(1, args.epochs + 1):
            top1, top5 = train(args, model, device, train_loader, test_loader,
                               optimizer, epoch)
            if top1 > best_top1:
                print("New best Top 1: %0.2f%%, Top 5: %0.2f%%" % (top1, top5))
                best_top1 = top1
                best_top5 = top5
                if args.save_model:
                    filename = "cnn-mnist.pth" if args.cnn else "transformer-mnist.pth"
                    torch.save(model.state_dict(), filename)
                    print("Saving best model on file: ", filename)

        print("Best Top 1: %0.2f%%, Top 5: %0.2f%% in %d epochs" %
              (best_top1, best_top5, args.epochs))

    elapsed_time = datetime.datetime.now() - start_time
    print("Elapsed time (train): %s" % elapsed_time)

    if args.visualize:
        viz_features(args, model)
import numpy as np
import matplotlib.pyplot as plt

batch_size = 256  # folder num # image_num = batch_size * 32
num_workers = 8  # False
epoch = 1000
dir_name = t.strftime('~%Y%m%d~%H%M%S', t.localtime(t.time()))
log_train = './log/' + dir_name + '/train'
writer = SummaryWriter(log_train)

v = ViT(
    image_size=128,  # 256
    patch_size=8,  # 32
    num_classes=2,
    dim=512,  # 1024
    depth=2,
    heads=4,
    mlp_dim=2048,
    channels=1,
    dropout=0.3,
    emb_dropout=0.3)  # small layers

# pytorch_total_params = sum(p.numel() for p in v.parameters())
# print(pytorch_total_params)
# pass

##### Hyperparams #####
# bce = nn.BCELoss()
criterion = nn.CrossEntropyLoss()
sigmoid = nn.Sigmoid()
opt = torch.optim.Adam(v.parameters(), lr=3e-4)
    model_name = args.model.lower()

    if model_name == "resnet20":
        model = resnet.resnet20()
    elif model_name == "resnet32":
        model = resnet.resnet32()
    elif model_name == "resnet44":
        model = resnet.resnet44()
    elif model_name == "resnet56":
        model = resnet.resnet56()
    elif model_name == "resnet110":
        model = resnet.resnet110()
    elif model_name == "vit":
        #hidden=256,very BAD!!!
        #lr=0.001 nearly same
        model = ViT(image_size = 32,patch_size = 4,num_classes = nClass,dim = 21,depth = 6,heads = 3,ff_hidden = 128,dropout = 0,emb_dropout = 0.1)    
        # model = ImageTransformer(image_size=32, patch_size=4, num_classes=nClass, channels=3,dim=64, depth=6, heads=8, mlp_dim=128)          #
        # model = ViT(image_size = 256,patch_size = 32,num_classes = 1000,dim = 1024,depth = 6,eads = 16,mlp_dim = 2048,dropout = 0.1,emb_dropout = 0.1)
        #24 overfit
    elif model_name == "distiller":
        teacher = resnet50(pretrained = True)
        teacher.cuda()
        model = DistillableViT(image_size = 32,patch_size = 4,num_classes = nClass,dim = 64,depth = 6,heads = 8,mlp_dim = 128,dropout = 0.1,emb_dropout = 0.1)
        distiller = DistillWrapper(student = model,teacher = teacher,temperature = 3,alpha = 0.5)
    elif model_name == "lamlay":
        args.batch_size = 128;          args.weight_decay=0.0001
        # model = lambda_resnet26()
        model = LambdaResNet18()
        args.log_dir=f"./logs/lamlay/"
    elif model_name == "jaggi":
        VoT_config['use_attention'] = config.self_attention
Exemple #18
0
def main():


    parser = argparse.ArgumentParser(description='ViT')
    parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats')
    parser.add_argument('--dataset', default='dvsc')
    parser.add_argument('--resume', default='dvsc-sgd-regularmodel_last.pth')
    parser.add_argument('--set', default = 'test')
    parser.add_argument('--mode', default='regular')
    parser.add_argument('--batch', default=8)
    parser.add_argument('--cuda', default=True)
    args = parser.parse_args()


    os.system('mkdir -p weights')

    dataset = {'smnist': SMNIST, 'dvsc': DVSC}
    
    if args.dataset == 'smnist':
        image_size  = 60
        patch_size  = 10
        num_classes = 10
        samp = 6
    elif args.dataset == 'dvsc':
        image_size  = 384
        patch_size  = 32
        num_classes = 2
        samp = 12


    if args.mode == 'normal':
        model = ViT(
            image_size  = image_size,
            patch_size  = patch_size,
            num_classes = num_classes,
            dim         = 512,
            depth       = 4,
            heads       = 8,
            mlp_dim     = 512,
            dropout     = 0.1,
            emb_dropout = 0.1
        )
    else :
        model = ViT_sphere(
            image_size  = image_size,
            patch_size  = patch_size,
            num_classes = num_classes,
            dim         = 512,
            depth       = 4,
            heads       = 8,
            mlp_dim     = 512,
            base_order = 1,
            mode = args.mode, # face, vertex and regular
            samp = samp,
            dropout     = 0.1,
            emb_dropout = 0.1
        )

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])

    print("Trainable parameters", params)

    path    = 'weights/'
    model = load_model(model, os.path.join(path, args.resume))
    cuda    = args.cuda
    batch   = args.batch

    test_data   = dataset[args.dataset](args.data_dir, args.set, image_size, image_size, None)
    test_loader = DataLoader(dataset=test_data, batch_size=batch, shuffle=False)

    if cuda:
        model = model.cuda()
    model.eval()

    P=np.array([])
    T=np.array([])
    
    #df = pd.read_csv("dvsc.csv")

    for i, data in enumerate(tqdm(test_loader)):
        img, target = data
        if cuda:
            img    = img.cuda()
            target = target.cuda()
        preds = model(img)
        probabilities = torch.nn.functional.softmax(preds, dim=1)
        preds = torch.argmax(probabilities, dim =1) 
        P = np.concatenate([P,preds.cpu().numpy()])
        T = np.concatenate([T,target.cpu().numpy()])

    confusion = confusion_matrix(P, T)

    #df['pred_class'] = P    
    #df.to_csv('dvsc_p_regular.csv')

    print('Confusion Matrix\n')
    print(confusion)
        
    print('\nClassification Report\n')
    print(classification_report(P, T, target_names=test_data.category))
Exemple #19
0
gamma = 0.7
seed = 42

set_random_seeds(seed)

efficient_transformer = Linformer(
    dim=128,
    seq_len=49 + 1,  # 7x7 patches + 1 cls-token
    depth=12,
    heads=8,
    k=64)

### change channels=6 for 6 input plans ####
l_model = ViT(dim=128,
              image_size=224,
              patch_size=32,
              num_classes=2,
              channels=6,
              transformer=efficient_transformer)

### change the class __init__ function to have more plans ###
v_model = ViT(image_size=256,
              patch_size=32,
              num_classes=1000,
              dim=1024,
              depth=6,
              heads=16,
              mlp_dim=2048,
              dropout=0.1,
              emb_dropout=0.1)

x = torch.randn(1, 6, 224, 224)  # can be any channels
 def to_vit(self):
     v = ViT(*self.args, **self.kwargs)
     v.load_state_dict(self.state_dict())
     return v
def main():
    # options
    parser = argparse.ArgumentParser()
    parser.add_argument('--model_train', type=str, required=True)
    parser.add_argument('--model_path', type=str, required=True)
    parser.add_argument('--num_classes', type=int, required=True)
    parser.add_argument('--output_path', type=str, required=True)
    args = parser.parse_args()

    if args.model_train == 'efficientnet-b0':
      netD = EfficientNet.from_pretrained('efficientnet-b0', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b1':
      netD = EfficientNet.from_pretrained('efficientnet-b1', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b2':
      netD = EfficientNet.from_pretrained('efficientnet-b2', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b3':
      netD = EfficientNet.from_pretrained('efficientnet-b3', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b4':
      netD = EfficientNet.from_pretrained('efficientnet-b4', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b5':
      netD = EfficientNet.from_pretrained('efficientnet-b5', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b6':
      netD = EfficientNet.from_pretrained('efficientnet-b6', num_classes=args.num_classes)
    elif args.model_train == 'efficientnet-b7':
      netD = EfficientNet.from_pretrained('efficientnet-b7', num_classes=args.num_classes)



    elif args.model_train == 'mobilenetv3_small':
      from arch.mobilenetv3_arch import MobileNetV3
      netD = MobileNetV3(n_class=args.num_classes, mode='small', input_size=256)
    elif args.model_train == 'mobilenetv3_large':
      from arch.mobilenetv3_arch import MobileNetV3
      netD = MobileNetV3(n_class=args.num_classes, mode='large', input_size=256)



    elif args.model_train == 'resnet50':
      from arch.resnet_arch import resnet50
      netD = resnet50(num_classes=args.num_classes, pretrain=True)
    elif args.model_train == 'resnet101':
      from arch.resnet_arch import resnet101
      netD = resnet101(num_classes=args.num_classes, pretrain=True)
    elif args.model_train == 'resnet152':
      from arch.resnet_arch import resnet152
      netD = resnet152(num_classes=args.num_classes, pretrain=True)

    #############################################
    elif args.model_train == 'ViT':
      from vit_pytorch import ViT
      netD = ViT(
          image_size = 256,
          patch_size = 32,
          num_classes = args.num_classes,
          dim = 1024,
          depth = 6,
          heads = 16,
          mlp_dim = 2048,
          dropout = 0.1,
          emb_dropout = 0.1
      )

    elif args.model_train == 'DeepViT':
      from vit_pytorch.deepvit import DeepViT
      netD = DeepViT(
          image_size = 256,
          patch_size = 32,
          num_classes = args.num_classes,
          dim = 1024,
          depth = 6,
          heads = 16,
          mlp_dim = 2048,
          dropout = 0.1,
          emb_dropout = 0.1
      )


    #############################################

    elif model_train == 'RepVGG-A0':
      from arch.RepVGG_arch import create_RepVGG_A0
      self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-A1':
      from arch.RepVGG_arch import create_RepVGG_A1
      self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-A2':
      from arch.RepVGG_arch import create_RepVGG_A2
      self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B0':
      from arch.RepVGG_arch import create_RepVGG_B0
      self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B1':
      from arch.RepVGG_arch import create_RepVGG_B1
      self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B1g2':
      from arch.RepVGG_arch import create_RepVGG_B1g2
      self.netD = create_RepVGG_B1g2(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B1g4':
      from arch.RepVGG_arch import create_RepVGG_B1g4
      self.netD = create_RepVGG_B1g4(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B2':
      from arch.RepVGG_arch import create_RepVGG_B2
      self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B2g2':
      from arch.RepVGG_arch import create_RepVGG_B2g2
      self.netD = create_RepVGG_B2g2(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B2g4':
      from arch.RepVGG_arch import create_RepVGG_B2g4
      self.netD = create_RepVGG_B2g4(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B3':
      from arch.RepVGG_arch import create_RepVGG_B3
      self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B3g2':
      from arch.RepVGG_arch import create_RepVGG_B3g2
      self.netD = create_RepVGG_B3g2(deploy=False, num_classes=num_classes)

    elif model_train == 'RepVGG-B3g4':
      from arch.RepVGG_arch import create_RepVGG_B3g4
      self.netD = create_RepVGG_B3g4(deploy=False, num_classes=num_classes)

    #############################################

    elif args.model_train == 'squeezenet_1_0':
      from arch.squeezenet_arch import SqueezeNet
      netD = SqueezeNet(num_classes=args.num_classes, version='1_0')

    elif args.model_train == 'squeezenet_1_1':
      from arch.squeezenet_arch import SqueezeNet
      netD = SqueezeNet(num_classes=args.num_classes, version='1_1')
    #############################################
    elif args.model_train == 'vgg11':
      from arch.vgg_arch import create_vgg11
      netD = create_vgg11(num_classes, pretrained=True)
    elif args.model_train == 'vgg13':
      from arch.vgg_arch import create_vgg13
      netD = create_vgg13(num_classes, pretrained=True)
    elif args.model_train == 'vgg16':
      from arch.vgg_arch import create_vgg16
      netD = create_vgg16(num_classes, pretrained=True)
    elif args.model_train == 'vgg19':
      from arch.vgg_arch import create_vgg19
      netD = create_vgg19(num_classes, pretrained=True)

    #############################################
    elif args.model_train == 'SwinTransformer':
      from swin_transformer_pytorch import SwinTransformer

      netD = SwinTransformer(
          hidden_dim=96,
          layers=(2, 2, 6, 2),
          heads=(3, 6, 12, 24),
          channels=3,
          num_classes=args.num_classes,
          head_dim=32,
          window_size=8,
          downscaling_factors=(4, 2, 2, 2),
          relative_pos_embedding=True
      )








    from torch.autograd import Variable

    import torch.onnx
    import torchvision
    import torch

    dummy_input = Variable(torch.randn(1, 3, 256, 256)) # don't set it too high, will run out of RAM
    state_dict = torch.load(args.model_path)
    print("Loaded model from model path into state_dict.")

    netD.load_state_dict(state_dict)
    torch.onnx.export(netD, dummy_input, args.output_path, opset_version=11)
    print("Done.")
Exemple #22
0
root_path = '/home/ubuntu/dataset/dfdc_image/train/dfdc_train_part_10/'
test_path = '/home/ubuntu/dataset/dfdc_image/test/'
batch_size = 10 #folder num # image_num = batch_size * 32
num_workers= 4
epoch = 100
dir_name = t.strftime('~%Y%m%d~%H%M%S', t.localtime(t.time()))
log_train = './log/' + dir_name + '/train'
writer = SummaryWriter(log_train)

v = ViT(
    image_size = 256,
    patch_size = 32,
    num_classes = 2,
    dim = 1024,
    depth = 6,
    heads = 8,
    mlp_dim = 2048,
    dropout = 0.3,
    emb_dropout = 0.3
)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(v))
criterion = nn.CrossEntropyLoss()
bce = nn.BCELoss()
sigmoid = nn.Sigmoid()
opt = torch.optim.Adam(v.parameters(), lr=3e-4)
#opt = torch.optim.SGD(v.parameters(), lr=3e-4)
v.cuda()
def main():

    parser = argparse.ArgumentParser(description='ViT')
    parser.add_argument('--data_dir', default='data/sph_dogs_vs_cats')
    parser.add_argument('--dataset', default='dvsc')
    parser.add_argument('--exp_id', default='sdvsc-adam')
    parser.add_argument('--mode', default='normal')
    parser.add_argument('--batch', default=128)
    parser.add_argument('--epochs', default=10)
    parser.add_argument('--cuda', default=True)
    parser.add_argument('--optim', default='SGD')
    args = parser.parse_args()

    os.system('mkdir -p weights')

    dataset = {'smnist': SMNIST, 'dvsc': DVSC}
    if args.dataset == 'smnist':
        image_size = 60
        patch_size = 10
        num_classes = 10
        samp = 6
    elif args.dataset == 'dvsc':
        image_size = 384
        patch_size = 32
        num_classes = 2
        samp = 12

    if args.mode == 'normal':
        model = ViT(image_size=image_size,
                    patch_size=patch_size,
                    num_classes=num_classes,
                    dim=512,
                    depth=4,
                    heads=8,
                    mlp_dim=512,
                    dropout=0.1,
                    emb_dropout=0.1)
    else:
        model = ViT_sphere(
            image_size=image_size,
            patch_size=patch_size,
            num_classes=num_classes,
            dim=512,
            depth=4,
            heads=8,
            mlp_dim=512,
            base_order=1,
            mode=args.mode,  # face, vertex and regular
            samp=samp,
            dropout=0.1,
            emb_dropout=0.1)

    model_parameters = filter(lambda p: p.requires_grad, model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])

    print("Trainable parameters", params)

    cuda = args.cuda
    epochs = args.epochs
    batch = args.batch
    path = 'weights/'

    train_data = dataset[args.dataset](args.data_dir, 'train', image_size,
                                       image_size, None)
    valid_data = dataset[args.dataset](args.data_dir, 'valid', image_size,
                                       image_size, None)

    train_loader = DataLoader(dataset=train_data,
                              batch_size=batch,
                              shuffle=True)
    valid_loader = DataLoader(dataset=valid_data,
                              batch_size=batch,
                              shuffle=True)

    if cuda:
        model = model.cuda()
    model.train()
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
    else:
        optimizer = optim.Adam(model.parameters(), lr=1e-3)  #, momentum=0.9)

    cla_loss = torch.nn.CrossEntropyLoss()

    valid_loss = 1000
    valid_acc = 0

    print("Training Start")
    T_L = []
    V_L = []
    V_a = []
    for i in range(epochs):
        print("Epoch", i + 1)
        model.train()
        L = []
        for i, data in enumerate(tqdm(train_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            output = cla_loss(preds, target)
            L.append(output.cpu().item())
            output.backward()
            optimizer.step()
            optimizer.zero_grad()

        T_L.append(np.mean(L))
        print("train loss:", np.mean(L))

        sum_acc = 0
        total = len(valid_data)
        model.eval()
        for i, data in enumerate(tqdm(valid_loader)):
            img, target = data
            if cuda:
                img = img.cuda()
                target = target.cuda()
            preds = model(img)
            L.append(cla_loss(preds, target).item())
            probabilities = torch.nn.functional.softmax(preds, dim=1)
            preds = torch.argmax(probabilities, dim=1)
            acc = torch.sum(
                torch.where(preds == target,
                            torch.tensor(1, device=preds.device),
                            torch.tensor(0, device=preds.device)))
            sum_acc += acc

        v_l = np.mean(L)
        v_a = sum_acc.item() / total * 100

        if v_a > valid_acc:
            valid_acc = v_a
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_acc.pth')

        if v_l < valid_loss:
            valid_loss = v_l
            torch.save(
                {
                    'epoch': epochs,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                }, path + args.exp_id + 'model_loss.pth')

        V_L.append(v_l)
        V_a.append(v_a)
        print("val loss:", v_l)
        print("val acc:", v_a)

    print(T_L)
    plt.plot(T_L, label='Total_loss', color='blue')
    plt.plot(V_L, label='Valid_loss', color='red')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("loss")
    plt.savefig(path + args.exp_id + 'Learning_Curves.png')
    plt.clf()
    plt.plot(V_a, label='Valid_acc', color='cyan')
    plt.legend(loc="upper left")
    plt.xlabel("num of epochs")
    plt.ylabel("accuracy")
    plt.savefig(path + args.exp_id + 'Val_acc.png')

    torch.save(
        {
            'epoch': epochs,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, path + args.exp_id + 'model_last.pth')
Exemple #24
0
            print(dic, print_size(dict_[dic]))
    else:
        return str(dict_.shape)


# print(pretain_tf_model.keys())
input_size = 224
patch_size = 16
num_layers = 12
# print(pretain_tf_model.keys())
# print_size(pretain_tf_model['pre_logits'])

v = ViT(image_size=input_size,
        patch_size=patch_size,
        num_classes=1000,
        depth=num_layers,
        heads=12,
        mlp_dim=3072,
        dropout=0.1,
        emb_dropout=0.1)

print("Model's state_dict:")
for param_tensor in v.state_dict():
    print(param_tensor, "\t", v.state_dict()[param_tensor].size())

## copy embedding
tf_dict = {}

embedding_weight_shape = pretain_tf_model['embedding']['kernel'].shape
embedding_weight = np.array(
    jnp.transpose(pretain_tf_model['embedding']['kernel'], (3, 2, 0, 1)))
# embedding_weight = pretain_tf_model['embedding']['kernel'].reshape([embedding_weight_shape[3],embedding_weight_shape[2],embedding_weight_shape[1],embedding_weight_shape[0]])
def main():
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    parser.add_argument('--lr',
                        type=float,
                        default=0.01,
                        metavar='S',
                        help='learning rate')
    parser.add_argument('--batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training (default: 128)')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=False,
                        help='For Saving the current Model')
    parser.add_argument('--normalize',
                        action='store_true',
                        default=False,
                        help='normalize input dataset')
    parser.add_argument('--cnn',
                        action='store_true',
                        default=False,
                        help='use cnn model')

    args = parser.parse_args()
    use_cuda = torch.cuda.is_available()

    kwargs = {'num_workers': 4, 'pin_memory': True} if use_cuda else {}

    if args.normalize:
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
    else:
        transform = transforms.Compose([transforms.ToTensor()])

    x_train = datasets.MNIST(root='./data',
                             train=True,
                             download=True,
                             transform=transform)

    x_test = datasets.MNIST(root='./data',
                            train=False,
                            download=True,
                            transform=transform)

    DataLoader = torch.utils.data.DataLoader
    train_loader = DataLoader(x_train,
                              shuffle=True,
                              batch_size=args.batch_size,
                              **kwargs)

    test_loader = DataLoader(x_test,
                             shuffle=False,
                             batch_size=args.batch_size,
                             **kwargs)

    device = torch.device("cuda" if use_cuda else "cpu")
    if args.cnn:
        model = CNNModel().to(device)
    else:
        model = ViT(
            image_size=28,
            patch_size=14,
            num_classes=10,
            dim=128,
            depth=6,
            heads=8,
            mlp_dim=128,
            channels=1,
        ).to(device)

    if torch.cuda.device_count() > 1:
        print("Available GPUs:", torch.cuda.device_count())
        model = nn.DataParallel(model)
    print("Model:", model)
    print("Device:", device)
    optimizer = optim.Adam(model.parameters())

    start_time = datetime.datetime.now()
    best_top1 = 0
    best_top5 = 0
    for epoch in range(1, args.epochs + 1):
        top1, top5 = train(args, model, device, train_loader, test_loader,
                           optimizer, epoch)
        if top1 > best_top1:
            print("New best Top 1: %0.2f%%, Top 5: %0.2f%%" % (top1, top5))
            best_top1 = top1
            best_top5 = top5
    elapsed_time = datetime.datetime.now() - start_time
    print("Elapsed time (train): %s" % elapsed_time)
    print("Best Top 1: %0.2f%%, Top 5: %0.2f%%" % (best_top1, best_top5))

    if args.save_model:
        torch.save(model.state_dict(), "mnist.pth")
Exemple #26
0
    if args.version == 2:
        args.cos = True
        args.moco_t = 0.2
    if args.version == 3:
        args.cos = True
        args.symmetric = True

    print(args)

    vit = ViT(
        image_size=32,
        patch_size=4,
        num_classes=args.moco_dim,
        # dim = 256,
        # depth = 4,
        # heads = 12,
        # mlp_dim = 512,
        dim=256,
        depth=3,
        heads=8,
        mlp_dim=384,
        dropout=0.1,
        emb_dropout=0.1)

    model = MoCo(dim=args.moco_dim,
                 K=args.moco_k,
                 m=args.moco_m,
                 T=args.moco_t,
                 ver=args.version,
                 arch=args.arch,
                 bn_splits=args.bn_splits,
                 symmetric=args.symmetric,