def __init__(self, *, dim_text=512, dim_image=512, dim_latent=512, num_text_tokens=10000, num_visual_tokens=512, text_enc_depth=6, visual_enc_depth=6, text_seq_len=256, visual_seq_len=1024, text_heads=8, visual_heads=8, vae=None): super().__init__() self.text_emb = nn.Embedding(num_text_tokens, dim_text) self.text_pos_emb = nn.Embedding(text_seq_len, dim_text) self.text_transformer = Encoder(dim=dim_text, depth=text_enc_depth, heads=text_heads) self.to_text_latent = nn.Linear(dim_text, dim_latent, bias=False) self.visual_emb = nn.Embedding(num_visual_tokens, dim_image) self.visual_pos_emb = nn.Embedding(visual_seq_len, dim_image) self.visual_transformer = Encoder(dim=dim_image, depth=visual_enc_depth, heads=visual_heads) self.to_visual_latent = nn.Linear(dim_image, dim_latent, bias=False) self.temperature = nn.Parameter(torch.tensor(1.)) self.vae = vae if exists(self.vae): self.vae = vae self.visual_emb = vae.codebook
def __init__( self, *, dim_text = 512, dim_image = 512, dim_latent = 512, num_text_tokens = 10000, text_enc_depth = 6, text_seq_len = 256, text_heads = 8, num_visual_tokens = 512, visual_enc_depth = 6, visual_heads = 8, visual_image_size = 256, visual_patch_size = 32, channels = 3 ): super().__init__() self.text_emb = nn.Embedding(num_text_tokens, dim_text) self.text_pos_emb = nn.Embedding(text_seq_len, dim_text) self.text_transformer = Encoder(dim = dim_text, depth = text_enc_depth, heads = text_heads) self.to_text_latent = nn.Linear(dim_text, dim_latent, bias = False) assert visual_image_size % visual_patch_size == 0, 'Image dimensions must be divisible by the patch size.' num_patches = (visual_image_size // visual_patch_size) ** 2 patch_dim = channels * visual_patch_size ** 2 self.visual_patch_size = visual_patch_size self.to_visual_embedding = nn.Linear(patch_dim, dim_image) self.visual_pos_emb = nn.Embedding(num_patches, dim_image) self.visual_transformer = Encoder(dim = dim_image, depth = visual_enc_depth, heads = visual_heads) self.to_visual_latent = nn.Linear(dim_image, dim_latent, bias = False) self.temperature = nn.Parameter(torch.tensor(1.))
def __init__(self, word_vectors, char_vectors, hidden_size, drop_prob=0.): super(BiDAF, self).__init__() self.hidden_size = 2 * hidden_size # As we concatinating word vectors and Char # vectors self.emb = layers.Embedding(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=hidden_size, drop_prob=drop_prob) self.enc = Encoder(dim=self.hidden_size, depth=1, heads=3, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.att = layers.TBiDAFAttention(hidden_size=self.hidden_size, drop_prob=drop_prob) self.mod = Encoder(dim=2 * self.hidden_size, depth=3, heads=6, ff_glu=True, ff_dropout=self.drop_prob, attn_dropout=self.drop_prob, use_scalenorm=True, position_infused_attn=True) self.out = layers.BiDAFOutput(hidden_size=self.hidden_size, drop_prob=self.drop_prob)
def __init__(self, num_classes = 2,shuff_type="shuf",trans_dim=192): super(trans_shufflenet, self).__init__() self.trans_dim=trans_dim self.input_channel = 3 self.num_output = num_classes param={"class_num":num_classes, "channel_ratio":1} self.upsampel=nn.UpsamplingBilinear2d(scale_factor=2) # self.conv if shuff_type=="shuf": self.shufflenet=torchvision.models.shufflenet_v2_x1_0() print(shuff_type) elif shuff_type=="shuf_se": self.shufflenet=ShuffleNetV2SE(param) print(shuff_type) elif shuff_type=="shuf_k5_liteconv": self.shufflenet=ShuffleNetV2K5Lite(param) print(shuff_type) elif shuff_type=="shuf_liteconv": self.shufflenet=ShuffleNetV2LiteConv(param) print(shuff_type) elif shuff_type=="shuf_k5": self.shufflenet=ShuffleNetV2K5(param) print(shuff_type) elif shuff_type=="shuf_csp": self.shufflenet=ShuffleNetV2CSP(param) print(shuff_type) elif shuff_type=="shuf_sk": self.shufflenet=ShuffleNetV2SK(param) print(shuff_type) self.up1=UP(self.trans_dim,self.trans_dim//2) # self.up2=UP(self.trans_dim//2,self.trans_dim//4) # self.up3=UP(self.trans_dim//16,self.trans_dim//64) # self.up4 = UP(self.trans_dim // 64, self.trans_dim // 256) # self.shufflenet=ShuffleNetV2SE() self.Vit= ViT( dim = self.trans_dim, image_size = 256, patch_size = 32, num_classes = 2, transformer = Encoder( dim = self.trans_dim, # set to be the same as the wrapper depth = 12, heads = 8, ff_glu = True, # ex. feed forward GLU variant https://arxiv.org/abs/2002.05202 residual_attn = True # ex. residual attention https://arxiv.org/abs/2012.11747 ) )
def __init__(self, hidden_size, drop_prob): super(BiDAFOutput, self).__init__() self.att_linear_1 = nn.Linear(2 * hidden_size, 1) self.mod_linear_1 = nn.Linear(2 * hidden_size, 1) # self.rnn = RNNEncoder(input_size=2 * hidden_size, # hidden_size=hidden_size, # num_layers=1, # drop_prob=drop_prob) self.rnn = Encoder(dim=2 * hidden_size, depth=1, heads=8, ff_glu=True, ff_dropout=drop_prob, attn_dropout=drop_prob, use_scalenorm=True, position_infused_attn=True) self.att_linear_2 = nn.Linear(2 * hidden_size, 1) self.mod_linear_2 = nn.Linear(2 * hidden_size, 1) self.hidden_size = hidden_size
def __init__(self, num_classes=2): super(stn_trans_shufflenet, self).__init__() self.input_channel = 3 self.num_output = num_classes self.shufflenet = torchvision.models.shufflenet_v2_x1_0( pretrained=False, num_classes=num_classes) self.Vit = ViT( dim=192, image_size=args.img_size, patch_size=8, num_classes=2, transformer=Encoder( dim=192, # set to be the same as the wrapper depth=12, heads=8, ff_glu= True, # ex. feed forward GLU variant https://arxiv.org/abs/2002.05202 residual_attn= True # ex. residual attention https://arxiv.org/abs/2012.11747 )) # 空间变换器定位 - 网络 self.localization = nn.Sequential(nn.Conv2d(3, 24, kernel_size=7), nn.MaxPool2d(2, stride=2), nn.ReLU(True), nn.Conv2d(24, 36, kernel_size=5), nn.MaxPool2d(2, stride=2), nn.ReLU(True)) # 3 * 2 affine矩阵的回归量 self.fc_loc = nn.Sequential(nn.Linear(36 * 10 * 10, 32), nn.ReLU(True), nn.Linear(32, 3 * 2)) # 使用身份转换初始化权重/偏差 self.fc_loc[2].weight.data.zero_() self.fc_loc[2].bias.data.copy_( torch.tensor([1, 0, 0, 0, 1, 0], dtype=torch.float))
def __init__(self, model_train='tf_efficientnetv2_b0', num_classes=3, diffaug_activate=False, policy='color,translation', aug=None): super().__init__() ############################################# if model_train == 'efficientnet-b0': self.netD = EfficientNet.from_pretrained('efficientnet-b0', num_classes=num_classes) elif model_train == 'efficientnet-b1': self.netD = EfficientNet.from_pretrained('efficientnet-b1', num_classes=num_classes) elif model_train == 'efficientnet-b2': self.netD = EfficientNet.from_pretrained('efficientnet-b2', num_classes=num_classes) elif model_train == 'efficientnet-b3': self.netD = EfficientNet.from_pretrained('efficientnet-b3', num_classes=num_classes) elif model_train == 'efficientnet-b4': self.netD = EfficientNet.from_pretrained('efficientnet-b4', num_classes=num_classes) elif model_train == 'efficientnet-b5': self.netD = EfficientNet.from_pretrained('efficientnet-b5', num_classes=num_classes) elif model_train == 'efficientnet-b6': self.netD = EfficientNet.from_pretrained('efficientnet-b6', num_classes=num_classes) elif model_train == 'efficientnet-b7': self.netD = EfficientNet.from_pretrained('efficientnet-b7', num_classes=num_classes) elif model_train == 'mobilenetv3_small': from arch.mobilenetv3_arch import MobileNetV3 self.netD = MobileNetV3(n_class=num_classes, mode='small', input_size=256) elif model_train == 'mobilenetv3_large': from arch.mobilenetv3_arch import MobileNetV3 self.netD = MobileNetV3(n_class=num_classes, mode='large', input_size=256) elif model_train == 'resnet50': from arch.resnet_arch import resnet50 self.netD = resnet50(num_classes=num_classes, pretrain=cfg['pretrain']) elif model_train == 'resnet101': from arch.resnet_arch import resnet101 self.netD = resnet101(num_classes=num_classes, pretrain=cfg['pretrain']) elif model_train == 'resnet152': from arch.resnet_arch import resnet152 self.netD = resnet152(num_classes=num_classes, pretrain=cfg['pretrain']) ############################################# elif model_train == 'ViT': from vit_pytorch import ViT self.netD = ViT(image_size=256, patch_size=32, num_classes=num_classes, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) elif model_train == 'DeepViT': from vit_pytorch.deepvit import DeepViT self.netD = DeepViT(image_size=256, patch_size=32, num_classes=num_classes, dim=1024, depth=6, heads=16, mlp_dim=2048, dropout=0.1, emb_dropout=0.1) ############################################# elif model_train == 'RepVGG-A0': from arch.RepVGG_arch import create_RepVGG_A0 self.netD = create_RepVGG_A0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A1': from arch.RepVGG_arch import create_RepVGG_A1 self.netD = create_RepVGG_A1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-A2': from arch.RepVGG_arch import create_RepVGG_A2 self.netD = create_RepVGG_A2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B0': from arch.RepVGG_arch import create_RepVGG_B0 self.netD = create_RepVGG_B0(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1': from arch.RepVGG_arch import create_RepVGG_B1 self.netD = create_RepVGG_B1(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g2': from arch.RepVGG_arch import create_RepVGG_B1g2 self.netD = create_RepVGG_B1g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B1g4': from arch.RepVGG_arch import create_RepVGG_B1g4 self.netD = create_RepVGG_B1g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2': from arch.RepVGG_arch import create_RepVGG_B2 self.netD = create_RepVGG_B2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g2': from arch.RepVGG_arch import create_RepVGG_B2g2 self.netD = create_RepVGG_B2g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B2g4': from arch.RepVGG_arch import create_RepVGG_B2g4 self.netD = create_RepVGG_B2g4(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3': from arch.RepVGG_arch import create_RepVGG_B3 self.netD = create_RepVGG_B3(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g2': from arch.RepVGG_arch import create_RepVGG_B3g2 self.netD = create_RepVGG_B3g2(deploy=False, num_classes=num_classes) elif model_train == 'RepVGG-B3g4': from arch.RepVGG_arch import create_RepVGG_B3g4 self.netD = create_RepVGG_B3g4(deploy=False, num_classes=num_classes) ############################################# elif model_train == 'squeezenet_1_0': from arch.squeezenet_arch import SqueezeNet self.netD = SqueezeNet(num_classes=num_classes, version='1_0') elif model_train == 'squeezenet_1_1': from arch.squeezenet_arch import SqueezeNet self.netD = SqueezeNet(num_classes=num_classes, version='1_1') ############################################# elif model_train == 'vgg11': from arch.vgg_arch import create_vgg11 self.netD = create_vgg11(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg13': from arch.vgg_arch import create_vgg13 self.netD = create_vgg13(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg16': from arch.vgg_arch import create_vgg16 self.netD = create_vgg16(num_classes, pretrained=cfg['pretrain']) elif model_train == 'vgg19': from arch.vgg_arch import create_vgg19 self.netD = create_vgg19(num_classes, pretrained=cfg['pretrain']) ############################################# elif model_train == 'SwinTransformer': from swin_transformer_pytorch import SwinTransformer self.netD = SwinTransformer(hidden_dim=96, layers=(2, 2, 6, 2), heads=(3, 6, 12, 24), channels=3, num_classes=num_classes, head_dim=32, window_size=8, downscaling_factors=(4, 2, 2, 2), relative_pos_embedding=True) elif model_train == 'effV2': if cfg['size'] == "s": from arch.efficientnetV2_arch import effnetv2_s self.netD = effnetv2_s(num_classes=num_classes) elif cfg['size'] == "m": from arch.efficientnetV2_arch import effnetv2_m self.netD = effnetv2_m(num_classes=num_classes) elif cfg['size'] == "l": from arch.efficientnetV2_arch import effnetv2_l self.netD = effnetv2_l(num_classes=num_classes) elif cfg['size'] == "xl": from arch.efficientnetV2_arch import effnetv2_xl self.netD = effnetv2_xl(num_classes=num_classes) elif model_train == 'x_transformers': from x_transformers import ViTransformerWrapper, Encoder self.netD = ViTransformerWrapper(image_size=cfg['image_size'], patch_size=cfg['patch_size'], num_classes=num_classes, attn_layers=Encoder( dim=cfg['dim'], depth=cfg['depth'], heads=cfg['heads'], )) elif model_train == 'mobilevit': if cfg['model_size'] == "xxs": from arch.mobilevit_arch import mobilevit_xxs self.netD = mobilevit_xxs(num_classes=num_classes) elif cfg['model_size'] == "xs": from arch.mobilevit_arch import mobilevit_xs self.netD = mobilevit_xs(num_classes=num_classes) elif cfg['model_size'] == "x": from arch.mobilevit_arch import mobilevit_s self.netD = mobilevit_s(num_classes=num_classes) elif model_train == 'hrt': from arch.hrt_arch import HighResolutionTransformer self.netD = HighResolutionTransformer(num_classes) elif model_train == 'volo': if cfg['model_size'] == "volo_d1": from arch.volo_arch import volo_d1 self.netD = volo_d1(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d2": from arch.volo_arch import volo_d2 self.netD = volo_d2(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d3": from arch.volo_arch import volo_d3 self.netD = volo_d3(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d4": from arch.volo_arch import volo_d4 self.netD = volo_d4(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "volo_d5": from arch.volo_arch import volo_d5 self.netD = volo_d5(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'pvt_v2': if cfg['model_size'] == "pvt_v2_b0": from arch.pvt_v2_arch import pvt_v2_b0 self.netD = pvt_v2_b0(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b1": from arch.pvt_v2_arch import pvt_v2_b1 self.netD = pvt_v2_b1(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b2": from arch.pvt_v2_arch import pvt_v2_b2 self.netD = pvt_v2_b2(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b3": from arch.pvt_v2_arch import pvt_v2_b3 self.netD = pvt_v2_b3(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b4": from arch.pvt_v2_arch import pvt_v2_b4 self.netD = pvt_v2_b4(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b5": from arch.pvt_v2_arch import pvt_v2_b5 self.netD = pvt_v2_b5(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "pvt_v2_b2_li": from arch.pvt_v2_arch import pvt_v2_b2_li self.netD = pvt_v2_b2_li(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'ConvMLP': if cfg['model_size'] == "convmlp_s": from arch.ConvMLP_arch import convmlp_s self.netD = convmlp_s(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "convmlp_m": from arch.ConvMLP_arch import convmlp_m self.netD = convmlp_m(pretrained=cfg['pretrain'], num_classes=num_classes) elif cfg['model_size'] == "convmlp_l": from arch.ConvMLP_arch import convmlp_l self.netD = convmlp_l(pretrained=cfg['pretrain'], num_classes=num_classes) elif model_train == 'FocalTransformer': from arch.focal_transformer_arch import FocalTransformer self.netD = FocalTransformer(num_classes=num_classes) elif model_train == 'mobile_former': from arch.mobile_former_arch import MobileFormer, config_52, config_294, config_508 if cfg['model_size'] == "config_52": self.netD = MobileFormer(config_52) elif cfg['model_size'] == "config_294": self.netD = MobileFormer(config_294) elif cfg['model_size'] == "config_508": self.netD = MobileFormer(config_508) elif model_train == 'poolformer': if cfg['model_size'] == "poolformer_s12": from arch.poolformer_arch import poolformer_s12 self.netD = poolformer_s12(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_s24": from arch.poolformer_arch import poolformer_s24 self.netD = poolformer_s24(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_s36": from arch.poolformer_arch import poolformer_s36 self.netD = poolformer_s36(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_m36": from arch.poolformer_arch import poolformer_m36 self.netD = poolformer_m36(pretrained=True, num_classes=num_classes) if cfg['model_size'] == "poolformer_m48": from arch.poolformer_arch import poolformer_m48 self.netD = poolformer_m48(pretrained=True, num_classes=num_classes) elif model_train == 'timm': import timm self.netD = timm.create_model(cfg['model_choise'], num_classes=num_classes, pretrained=True) #weights_init(self.netD, 'kaiming') #only use this if there is no pretrain if aug == 'gridmix': from GridMixupLoss import GridMixupLoss self.criterion = GridMixupLoss(alpha=(0.4, 0.7), hole_aspect_ratio=1., crop_area_ratio=(0.5, 1), crop_aspect_ratio=(0.5, 2), n_holes_x=(2, 6)) elif aug == 'cutmix': from cutmix import cutmix self.criterion = cutmix(alpha=(0.4, 0.7), hole_aspect_ratio=1., crop_area_ratio=(0.5, 1), crop_aspect_ratio=(0.5, 2), n_holes_x=(2, 6)) self.aug = aug if cfg['loss'] == 'CenterLoss': from centerloss import CenterLoss self.criterion = CenterLoss(num_classes=num_classes, feat_dim=2, use_gpu=True) elif cfg['loss'] == 'normal': self.criterion = torch.nn.CrossEntropyLoss() self.accuracy = [] self.losses = [] self.diffaug_activate = diffaug_activate self.accuracy_val = [] self.losses_val = [] self.policy = policy self.iter_check = 0 if cfg['aug'] == 'MuAugment': rand_augment = BatchRandAugment(N_TFMS=3, MAGN=3, mean=cfg['means'], std=cfg['std']) self.mu_transform = MuAugment(rand_augment, N_COMPS=4, N_SELECTED=2)