def __init__(self, num_classes, width_ratio=0.5, height_ratio=0.5): super(BagReID_SE_RESNEXT, self).__init__() self.backbone = se_resnext50_32x4d(pretrained=True) # global branch self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.global_reduction = nn.Sequential( nn.Conv2d(2048, cfg.MODEL.GLOBAL_FEATS, 1), nn.BatchNorm2d(cfg.MODEL.GLOBAL_FEATS), nn.ReLU(True)) self.global_reduction.apply(weights_init_kaiming) self.global_bn = nn.BatchNorm1d(cfg.MODEL.GLOBAL_FEATS) self.global_softmax = nn.Linear(cfg.MODEL.GLOBAL_FEATS, num_classes) self.global_softmax.apply(weights_init_kaiming) # part branch self.part = Bottleneck(2048, 512) self.part_maxpool = nn.AdaptiveMaxPool2d((1, 1)) self.batch_drop = BatchDrop(height_ratio, width_ratio) self.part_reduction = nn.Sequential( nn.Linear(2048, cfg.MODEL.PART_FEATS, True), nn.BatchNorm1d(cfg.MODEL.PART_FEATS), nn.ReLU(True)) self.part_reduction.apply(weights_init_kaiming) self.part_bn = nn.BatchNorm1d(cfg.MODEL.PART_FEATS) self.part_softmax = nn.Linear(cfg.MODEL.PART_FEATS, num_classes) self.part_softmax.apply(weights_init_kaiming)
def __init__(self, num_classes=3, num_filters=32, pretrained=True, is_deconv=True): super().__init__() self.num_classes = num_classes pretrain = 'imagenet' if pretrained is True else None self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained=pretrain) bottom_channel_nr = 2048 self.conv1 = self.encoder.layer0 #self.se_e1 = SCSEBlock(64) self.conv2 = self.encoder.layer1 #self.se_e2 = SCSEBlock(64 * 4) self.conv3 = self.encoder.layer2 #self.se_e3 = SCSEBlock(128 * 4) self.conv4 = self.encoder.layer3 #self.se_e4 = SCSEBlock(256 * 4) self.conv5 = self.encoder.layer4 #self.se_e5 = SCSEBlock(512 * 4) self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False) self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 2, is_deconv) #self.se_d5 = SCSEBlock(num_filters * 2) self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 2, num_filters * 8, num_filters * 2, is_deconv) #self.se_d4 = SCSEBlock(num_filters * 2) self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 2, num_filters * 4, num_filters * 2, is_deconv) #self.se_d3 = SCSEBlock(num_filters * 2) self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2, num_filters * 2, is_deconv) #self.se_d2 = SCSEBlock(num_filters * 2) self.dec1 = DecoderBlockV(num_filters * 2, num_filters, num_filters * 2, is_deconv) #self.se_d1 = SCSEBlock(num_filters * 2) self.dec0 = ConvRelu(num_filters * 10, num_filters * 2) self.final = nn.Conv2d(num_filters * 2, num_classes, kernel_size=1)
def __init__(self, model_cfg): super().__init__() self.model_cfg = model_cfg embed_dim = self.model_cfg.EMBED_DIM if self.model_cfg.IMG_ENCODER in supported_img_encoders: if self.model_cfg.IMG_ENCODER == "se_resnext50_32x4d": self.vis_backbone = se_resnext50_32x4d() self.vis_backbone_bk = se_resnext50_32x4d() self.img_in_dim = 2048 self.domian_vis_fc = nn.Conv2d(self.img_in_dim, embed_dim,kernel_size=1) self.domian_vis_fc_bk = nn.Conv2d(self.img_in_dim, embed_dim,kernel_size=1) else: self.vis_backbone = EfficientNet.from_pretrained(self.model_cfg.IMG_ENCODER) self.vis_backbone_bk = EfficientNet.from_pretrained(self.model_cfg.IMG_ENCODER) self.img_in_dim = self.vis_backbone.out_channels self.domian_vis_fc = nn.Linear(self.img_in_dim, embed_dim) self.domian_vis_fc_bk = nn.Linear(self.img_in_dim, embed_dim) else: assert self.model_cfg.IMG_ENCODER in supported_img_encoders, "unsupported img encoder" self.bert_model = RobertaModel.from_pretrained(model_cfg.BERT_NAME) for p in self.bert_model.parameters(): p.requires_grad = False self.logit_scale = nn.Parameter(torch.ones(()), requires_grad=True) self.domian_vis_fc_merge = nn.Sequential(nn.Linear(embed_dim, embed_dim), nn.BatchNorm1d(embed_dim),nn.ReLU(), nn.Linear(embed_dim, embed_dim)) self.vis_car_fc = nn.Sequential(nn.BatchNorm1d(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2)) self.lang_car_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2)) self.vis_motion_fc = nn.Sequential(nn.BatchNorm1d(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2)) self.lang_motion_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2)) self.domian_lang_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.Linear(embed_dim, embed_dim), nn.ReLU(), nn.Linear(embed_dim, embed_dim)) if self.model_cfg.car_idloss: self.id_cls = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS)) if self.model_cfg.mo_idloss: self.id_cls2 = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS)) if self.model_cfg.share_idloss: self.id_cls3 = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS))
def __init__(self, num_classes=1): super().__init__() self.is_deconv = False is_deconv = False self.num_classes = num_classes self.pool = nn.MaxPool2d(2, 2) self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained='imagenet') self.relu = nn.ReLU(inplace=True) self.conv1 = nn.Sequential( self.encoder.layer0.conv1, self.encoder.layer0.bn1, self.encoder.layer0.relu1, ) self.conv2 = self.encoder.layer1 self.conv3 = self.encoder.layer2 self.conv4 = self.encoder.layer3 self.conv5 = self.encoder.layer4 bottom_channel_nr = 2048 self.center = nn.Sequential(Conv3BN(2048, 512, bn=True), Conv3BN(512, 256, bn=True), self.pool) self.dec5 = DecoderBlockV3(bottom_channel_nr + 256, 512, 64) self.dec4 = DecoderBlockV3(bottom_channel_nr // 2 + 64, 256, 64) self.dec3 = DecoderBlockV3(bottom_channel_nr // 4 + 64, 128, 64) self.dec2 = DecoderBlockV3(bottom_channel_nr // 8 + 64, 64, 64) self.dec1 = DecoderBlockV3(64, 32, 64) self.final = nn.Sequential( nn.Conv2d(320, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 1, kernel_size=1, padding=0), )
def __init__(self, type, num_classes): super().__init__() if type == "seresnext50": self.senet = se_resnext50_32x4d(pretrained="imagenet") # layer0_modules = [ # ('conv1', self.senet.layer0.conv1), # ('bn1', self.senet.layer0.bn1), # ('relu1', self.senet.layer0.relu1), # ] # self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer0 = self.senet.layer0 elif type == "seresnext101": self.senet = se_resnext101_32x4d(pretrained="imagenet") self.layer0 = self.senet.layer0 elif type == "seresnet50": self.senet = se_resnet50(pretrained="imagenet") self.layer0 = self.senet.layer0 elif type == "seresnet101": self.senet = se_resnet101(pretrained="imagenet") self.layer0 = self.senet.layer0 elif type == "seresnet152": self.senet = se_resnet152(pretrained="imagenet") self.layer0 = self.senet.layer0 elif type == "senet154": self.senet = senet154(pretrained="imagenet") self.layer0 = self.senet.layer0 else: raise Exception("Unsupported senet model type: '{}".format(type)) self.expand_channels = ExpandChannels2d(3) self.bn = nn.BatchNorm2d(3) self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1) self.dropout = nn.Dropout(0.2) self.last_linear = nn.Linear(2048, num_classes)
def load_model(model_name='resnet50',resume='Best',start_epoch=0,cn=3, save_dir='saved_models/',width=32,start=8,cls_number=10,avg_number=1,gpus=[0,1,2,3,4,5,6,7],kfold = 1,model_times=0,train=True): load_dict = None #load_dict = True if cn == 3 else None if model_name == 'resnet50': model = resnet50(num_classes=cls_number,pretrained=load_dict) elif model_name == 'resnet101': model = resnet101(num_classes=cls_number,pretrained=load_dict) elif model_name == 'resnet152': model = resnet152(num_classes=cls_number,pretrained=load_dict) elif model_name == 'densenet161': model = densenet161(num_classes=cls_number,pretrained=load_dict) elif model_name == 'xception': model = xception(num_classes=cls_number,pretrained=load_dict) model.conv1 = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'inception_v3': model = inception_v3(num_classes=cls_number,pretrained=load_dict) model.Conv2d_1a_3x3.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'seinception_v3': model = se_inception_v3(num_classes=cls_number) model.model.Conv2d_1a_3x3.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'inception_v4': model = inceptionv4(num_classes=cls_number,pretrained=load_dict) model.features[0].conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'inceptionresnetv2': model = inceptionresnetv2(num_classes=cls_number,pretrained=load_dict) model.conv2d_1a.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'seresnet50': model = se_resnet50(num_classes=cls_number,pretrained=load_dict) elif model_name == 'seresnet101': model = se_resnet101(num_classes=cls_number,pretrained=load_dict) elif model_name == 'seresnet152': model = se_resnet152(num_classes=cls_number,pretrained=load_dict) elif model_name == 'seresnext50': model = se_resnext50_32x4d(num_classes=cls_number,pretrained=load_dict) elif model_name == 'seresnext101': model = se_resnext101_32x4d(num_classes=cls_number,pretrained=load_dict) elif model_name == 'resnet50-101': model = SimpleNet() elif model_name == 'seresnet20': model = se_resnet20(num_classes=cls_number) elif model_name == 'seresnet32': model = se_resnet32(num_classes=cls_number) elif model_name == 'seresnet18': model = se_resnet18(num_classes=cls_number) elif model_name == 'seresnet34': model = se_resnet34(num_classes=cls_number) elif model_name == 'senet154': model = senet154(num_classes=cls_number,pretrained=load_dict) model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) elif model_name == 'nasnet': model = nasnetalarge(num_classes=cls_number,pretrained=load_dict) model.conv0.conv = nn.Conv2d(cn, 96, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'dpn98': model = dpn98(num_classes=cls_number,pretrained=load_dict) elif model_name == 'dpn107': model = dpn107(num_classes=cls_number,pretrained=load_dict) elif model_name == 'dpn92': model = dpn92(num_classes=cls_number,pretrained=load_dict) elif model_name == 'polynet': model = polynet(num_classes=cls_number,pretrained=load_dict) model.stem.conv1[0].conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) elif model_name == 'pnasnet': model = pnasnet5large(num_classes=cls_number,pretrained=load_dict) model.conv_0.conv = nn.Conv2d(cn, 96, kernel_size=(3, 3), stride=(2, 2), bias=False) #print(model) if '-' not in model_name and load_dict != True: if model_name in ['dpn98',]: model.features.conv1_1.conv = nn.Conv2d(cn, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) elif model_name in ['dpn92',]: model.features.conv1_1.conv = nn.Conv2d(cn, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) elif model_name in ['seresnet20','seresnet32']: model.conv1 = nn.Conv2d(cn, 16, kernel_size=3, stride=1, padding=1, bias=False) elif model_name in ['seresnet18','seresnet34']: model.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False) elif 'seresnext' in model_name: model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False) elif 'seresnet' in model_name: model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False) elif 'resnet' in model_name: model.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False) #model.fc = torch.nn.Linear(model.fc.in_features,cls_number) elif 'densenet' in model_name: model.features.conv0 = nn.Conv2d(cn, 96, kernel_size=7, stride=2, padding=3, bias=False) model.avgpool = torch.nn.AdaptiveAvgPool2d(output_size=1) else: pass #print(model) load_model = False #if model_name == 'resnet50': if load_dict != True and model_name == 'resnet50' and 0: base_model = resnet50(pretrained=True) model_dict = model.state_dict() new_state_dict = OrderedDict() for k, v in base_model.state_dict().items()[1:-2]: new_state_dict[k] = v model_dict.update(new_state_dict) model.load_state_dict(model_dict) print 'load imagenet' load_model = True model_ = model_name + '_' + \ str(width) + '_' + str(start) + '_' + str(cn) if kfold > 1: model_prefix = save_dir + str(model_times) + '_' + model_ else: model_prefix = save_dir + model_ if resume == 'Best' and avg_number >= 1: weight_path = glob(model_prefix + '*pth') cur_index = np.argsort(-np.array([float(cur_p.split('/')[-1].split('[')[-1].split(']')[0]) for cur_p in weight_path])) new_state_dict = OrderedDict() if len(weight_path) == 0: resume = '' elif avg_number == 1: resume = weight_path[0] else: for cnt,index in zip(range(avg_number),cur_index[:avg_number]): cur_resume = weight_path[index] print cur_resume model.load_state_dict(torch.load(cur_resume)) for k, v in model.state_dict().items(): if cnt == 0: new_state_dict[k] = v else: new_state_dict[k] = new_state_dict[k] + v if cnt == avg_number - 1: new_state_dict[k] = new_state_dict[k] / float(avg_number) model.load_state_dict(new_state_dict) if train == False: for index in cur_index[avg_number + 2:]: cur_resume = weight_path[index] print('remove resume %s ' %cur_resume) os.remove(cur_resume) if resume != '' and avg_number == 1: start_epoch = int(resume.split('-')[-3]) #print('resuming finetune from %s'%resume) logging.info('resuming finetune from %s'%resume) model.load_state_dict(torch.load(resume)) print('start-epoch : ',start_epoch) cuda_avail = torch.cuda.is_available() if cuda_avail: print 'cuda_avail: True' if len(gpus) > 1: model = torch.nn.DataParallel(model,device_ids=gpus).cuda() else: model = model.cuda() return model,start_epoch
'DenseNet161': lambda: E_densenet(densenet161(pretrained=True)), 'DenseNet169': lambda: E_densenet(densenet169(pretrained=True)), 'DenseNet201': lambda: E_densenet(densenet201(pretrained=True)), 'SENet154': lambda: E_senet(senet154(pretrained="imagenet")), 'SE_ResNet50': lambda: E_senet(se_resnet50(pretrained="imagenet")), 'SE_ResNet101': lambda: E_senet(se_resnet101(pretrained="imagenet")), 'SE_ResNet152': lambda: E_senet(se_resnet152(pretrained="imagenet")), 'SE_ResNext50_32x4d': lambda: E_senet(se_resnext50_32x4d(pretrained="imagenet")), 'SE_ResNext101_32x4d': lambda: E_senet(se_resnext101_32x4d(pretrained="imagenet")) } def get_models(args): backbone = args.backbone if os.getenv('TORCH_MODEL_ZOO') != args.pretrained_dir: os.environ['TORCH_MODEL_ZOO'] = args.pretrained_dir else: pass return __models_small__[backbone]()
def __init__(self, num_classes=1): super().__init__() self.is_deconv = False is_deconv = False self.num_classes = num_classes self.pool = nn.MaxPool2d(2, 2) self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained='imagenet') #self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained=None) self.relu = nn.ReLU(inplace=True) #self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False), # self.encoder.layer0.bn1, # self.encoder.layer0.relu1, # ) self.conv1 = nn.Sequential( self.encoder.layer0.conv1, self.encoder.layer0.bn1, self.encoder.layer0.relu1, ) self.conv2 = self.encoder.layer1 #self.conv2 = nn.Sequential(nn.MaxPool2d(kernel_size =2, stride = 2), # self.encoder.layer1 # ) self.conv3 = self.encoder.layer2 self.conv4 = self.encoder.layer3 self.conv5 = self.encoder.layer4 bottom_channel_nr = 2048 self.center = nn.Sequential(Conv3BN(2048, 512, bn=True), Conv3BN(512, 256, bn=True), self.pool) self.dec5 = DecoderBlockV3(bottom_channel_nr + 256, 512, 64) self.dec4 = DecoderBlockV3(bottom_channel_nr // 2 + 64, 256, 64) self.dec3 = DecoderBlockV3(bottom_channel_nr // 4 + 64, 128, 64) self.dec2 = DecoderBlockV3(bottom_channel_nr // 8 + 64, 64, 64) self.dec1 = DecoderBlockV3(64, 32, 64) self.fuse_pixel = nn.Sequential( nn.Conv2d(320, 64, kernel_size=3, padding=1), nn.ReLU(inplace=True), ) self.logit_pixel = nn.Conv2d(64, 1, kernel_size=1, padding=0) self.fuse_image = nn.Sequential(nn.Linear(2048, 128), nn.ReLU(inplace=True)) self.logit_image = nn.Linear(128, 1) #self.fuse = nn.Sequential( # nn.Conv2d(128, 64, kernel_size=3, padding=0), # nn.ReLU(inplace=True)) # self.fuse = nn.Sequential( # nn.Conv2d(128, 64, kernel_size=3, padding=0), # nn.ReLU(inplace=True)) self.logit = nn.Conv2d(64, 1, kernel_size=1, padding=0)