def __init__(self, num_classes=2): super(STPN, self).__init__() # CNNs self.cnn1 = bninception(pretrained='imagenet') self.cnn2 = bninception(pretrained='imagenet') self.cnn3 = bninception(pretrained='imagenet') self.cnn4 = bninception(pretrained='imagenet') self.cnn1 = self.cnn1.cuda() self.cnn2 = self.cnn2.cuda() self.cnn3 = self.cnn3.cuda() self.cnn4 = self.cnn4.cuda() # Spatial Stream self.avgPool1 = torch.nn.AvgPool2d((7, 7)) # Temporal Stream self.avgPool2 = torch.nn.AvgPool2d((7, 7)) # Attention stream # STCB layers self.stcb1 = STCB3.CompactBilinearPooling(input_dim1=1024, input_dim2=1024, input_dim3=1024, output_dim=1024) self.stcb1.cuda() self.stcb1.train() self.stcb2 = STCB2.CompactBilinearPooling(input_dim1=1024, input_dim2=1024, output_dim=2048) self.stcb2.cuda() self.stcb2.train() # Convolutional layers self.conv1 = torch.nn.Conv2d(2048, 64, (1, 1)).cuda() self.conv2 = torch.nn.Conv2d(64, 1, (1, 1)).cuda() self.sm = torch.nn.Softmax2d().cuda() # Weighted Pooling Layer self.wtPool = torch.nn.AvgPool2d((7, 7)) # Intersection of streams self.stcb3 = STCB3.CompactBilinearPooling(input_dim1=1024, input_dim2=1024, input_dim3=1024, output_dim=4096) self.stcb3.cuda() self.stcb3.train() self.fc = torch.nn.Linear(4096, num_classes) self.lrelu = torch.nn.LeakyReLU() self.dropout = torch.nn.Dropout(0.5) self.tanh = torch.nn.Tanh()
def get_bninception(): model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(1) model.conv1_7x7_s2 = nn.Conv2d(config.in_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, config.num_classes), ) if config.with_mse_loss: model.reconstruct_layer = nn.Sequential( nn.BatchNorm2d(1024), nn.UpsamplingBilinear2d([int(config.img_height/16), int(config.img_width/16)]), nn.Conv2d(1024, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(32, affine=True), nn.ReLU(), nn.UpsamplingBilinear2d([int(config.img_height/8), int(config.img_width/8)]), nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(32, affine=True), nn.ReLU(), nn.UpsamplingBilinear2d([int(config.img_height/4), int(config.img_width/4)]), nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(32, affine=True), nn.ReLU(), nn.UpsamplingBilinear2d([int(config.img_height/2), int(config.img_width/2)]), nn.Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(32, affine=True), nn.ReLU(), nn.UpsamplingBilinear2d([config.img_height, config.img_width]), nn.Conv2d(32, config.out_channels, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.Sigmoid(), ) return model
def get_net(model_name, num_classes, drop_rate, channels): if model_name == 'bninception': model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(1) model.conv1_7x7_s2 = nn.Conv2d(channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential(nn.BatchNorm1d(1024), nn.Dropout(drop_rate), nn.Linear(1024, num_classes)) elif model_name == 'resnet101': model = resnet101(pretrained="imagenet") model.avgpool = nn.AdaptiveAvgPool2d(1) model.conv1 = nn.Conv2d(channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential(nn.BatchNorm1d(2048), nn.Dropout(drop_rate), nn.Linear(2048, num_classes)) else: assert False and "Invalid model" return model
def get_net_channel3(): model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, config.num_classes), ) return model
def get_net(): model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(1) model.conv1_7x7_s2 = nn.Conv2d(config.channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, config.num_classes), ) return model
def InceptionV1(): """Inception V1 or GoogleNet + BatchNormalization""" model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(1) model.conv1_7x7_s2 = nn.Conv2d(cfg.channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential(nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, cfg.num_classes)) return model
def get_bninception_model(): model = bninception(pretrained="imagenet") model.global_pool = nn.AdaptiveAvgPool2d(output_size=1) model.conv1_7x7_s2 = nn.Conv2d(in_channels=config.channels, out_channels=64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential( nn.BatchNorm1d(num_features=1024), nn.Dropout(p=0.5), nn.Linear(in_features=1024, out_features=config.num_classes), ) return model
def build_inception(pretrained, num_classes): """ 构建inception模型 :param pretrained: 是否预训练 :param num_classes: 类别数 :return: 模型 """ model = bninception(pretrained='imagenet') model.global_pool = nn.AdaptiveAvgPool2d(1) model.conv1_7x7_s2 = nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, num_classes), ) return model
def __init__(self, num_classes): super().__init__() self.inception = bninception(pretrained="imagenet") conv1 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) conv1.weight.data[:, 0:3, :, :] = self.inception.conv1_7x7_s2.weight.data self.inception.conv1_7x7_s2 = conv1 self.inception.global_pool = nn.AdaptiveAvgPool2d(1) self.inception.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, num_classes), )
def get_bn_inception(): model = bninception() conv1_7x7_s2 = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) w = model.conv1_7x7_s2.weight conv1_7x7_s2.weight = torch.nn.Parameter( torch.cat((w, torch.mean(w, dim=1).unsqueeze(1)), dim=1)) model.conv1_7x7_s2 = conv1_7x7_s2 model.global_pool = nn.AvgPool2d(16, stride=1, padding=0, ceil_mode=True, count_include_pad=True) model.last_linear = nn.Sequential(nn.Dropout(), nn.Linear(1024, 28)) return model
def get_net(model_name=None, config=None, img_channels=3): if model_name: model_name = model_name.split("-")[0] if model_name == "bninception": print("train %s" % model_name) model = bninception(pretrained="imagenet") # bnin_dict = model.state_dict() # para = bnin_dict['conv1_7x7_s2.weight'].cpu().data.numpy() # para = np.concatenate((para, para[:, :1, :, :]), axis=1) # model.conv1_7x7_s2 = nn.Conv2d(config.channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) # model.load_state_dict(bnin_dict) model.global_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, config.num_classes), ) elif model_name == "resnet50": print("train %s" % model_name) model = resnet50(pretrained="imagenet") # res_dict = model.state_dict() # para = res_dict['conv1.weight'].data.numpy() # para = np.concatenate((para,para[:, :1, :, :]), axis=1) # res_dict['conv1.weight'] = torch.tensor(para) # model.conv1 = nn.Conv2d(config.channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) # model.load_state_dict(res_dict) model.avgpool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(2048), # nn.Dropout(0.5), nn.Linear(2048, config.num_classes), ) elif model_name == "octresnet50": print("train %s" % model_name) model = oct_resnet50(pretrained="imagenet") # res_dict = model.state_dict() # para = res_dict['conv1.weight'].data.numpy() # para = np.concatenate((para,para[:, :1, :, :]), axis=1) # res_dict['conv1.weight'] = torch.tensor(para) # model.conv1 = nn.Conv2d(config.channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) # model.load_state_dict(res_dict) # model.avgpool = nn.AdaptiveAvgPool2d(1) model.fc = nn.Sequential( # nn.BatchNorm1d(2048), # nn.Dropout(0.5), nn.Linear(2048, config.num_classes), ) elif model_name == "seresnext5032x4d": print("train %s" % model_name) model = se_resnext50_32x4d(pretrained="imagenet") # if config.img_weight == 100: # print(config.channels) if img_channels != 3: print("%s for visit" % model_name) # model = se_resnext50_32x4d(input_3x3=True, pretrained=None) layer0_modules = [ ('conv1', nn.Conv2d(7, 64, 3, stride=2, padding=1, bias=False)), ('bn1', nn.BatchNorm2d(64)), ('relu1', nn.ReLU(inplace=True)), ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn2', nn.BatchNorm2d(64)), ('relu2', nn.ReLU(inplace=True)), ('conv3', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn3', nn.BatchNorm2d(64)), ('relu3', nn.ReLU(inplace=True)), ('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True)) ] from collections import OrderedDict model.layer0 = nn.Sequential(OrderedDict(layer0_modules)) # model.layer0.conv1 = nn.Conv2d(img_channels, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False) # model.layer0 = nn.Sequential( # nn.Conv2d(img_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)), # nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.ReLU(inplace=True), # nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True), # ) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.Dropout(0.5), # nn.Linear(in_features=2048, out_features=1024, bias=True), # nn.ReLU(inplace=True), # nn.BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.Dropout(0.2), nn.Linear(2048, config.num_classes, bias=True), ) elif model_name == "seresnext101": print("train %s" % model_name) model = se_resnext101_32x4d(pretrained="imagenet") # if config.img_weight == 100: # print(config.channels) if img_channels != 3: model.layer0 = nn.Sequential( nn.Conv2d(img_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)), nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True), ) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( nn.BatchNorm1d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.Dropout(0.5), # nn.Linear(in_features=2048, out_features=1024, bias=True), # nn.ReLU(inplace=True), # nn.BatchNorm1d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.Dropout(0.2), nn.Linear(2048, config.num_classes, bias=True), ) elif model_name == "densenet169": print("train %s" % model_name) model = densenet169(pretrained="imagenet") # if config.img_weight == 100: # print(config.channels) # if img_channels != 3: # model.layer0 = nn.Sequential( # nn.Conv2d(img_channels, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)), # nn.BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), # nn.ReLU(inplace=True), # nn.MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True), # ) model.last_linear = nn.Sequential( nn.Linear(1664, config.num_classes, bias=True), ) elif model_name == "resnet18": print("train %s" % model_name) model = resnet18(pretrained="imagenet") # model.conv1 = nn.Conv2d(config.channels, 64, kernel_size=(2, 2), stride=(1, 1), padding=(3, 3), bias=False)# model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(2048), # nn.Dropout(0.8), nn.Linear(512, config.num_classes), ) elif model_name == "resnet56": print("train %s" % model_name) model = resnet56() #pretrained="imagenet" checkpoint = torch.load("./models/resnet56.th") from collections import OrderedDict new_state_dict = OrderedDict() for k, v in checkpoint["state_dict"].items(): namekey = k[7:] if "module." in k else k # remove `module.` if "linear" in namekey: namekey = namekey.replace("linear", "last_linear") # if "linear" in namekey: # namekey = namekey.replace("linear", "last_linear") new_state_dict[namekey] = v # print(namekey, end="|") model.load_state_dict(new_state_dict) if img_channels != 3: model.conv1 = nn.Conv2d(img_channels, 16, kernel_size=3, stride=1, padding=1, bias=False) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(2048), # nn.Dropout(0.8), nn.Linear(64, config.num_classes), ) elif model_name == "resnet110": print("train %s" % model_name) model = resnet110() #pretrained="imagenet" checkpoint = torch.load("./models/resnet110.th") from collections import OrderedDict new_state_dict = OrderedDict() for k, v in checkpoint["state_dict"].items(): namekey = k[7:] if "module." in k else k # remove `module.` if "linear" in namekey: namekey = namekey.replace("linear", "last_linear") # if "linear" in namekey: # namekey = namekey.replace("linear", "last_linear") new_state_dict[namekey] = v # print(namekey, end="|") model.load_state_dict(new_state_dict) model.conv1 = nn.Conv2d(img_channels, 16, kernel_size=3, stride=1, padding=1, bias=False) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(64), # nn.Dropout(0.8), nn.Linear(64, config.num_classes), ) elif model_name == "resnet200": print("train %s" % model_name) model = resnet200() #pretrained="imagenet" model.conv1 = nn.Conv2d(img_channels, 16, kernel_size=3, stride=1, padding=1, bias=False) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.last_linear = nn.Sequential( # nn.BatchNorm1d(2048), # nn.Dropout(0.8), nn.Linear(64, config.num_classes), ) else: print("Didn't choose which model to be used!") raise ValueError return model
def Atlas_Inception(model_name, pretrained=False, drop_rate=0., num_channels=4): if model_name in ['bninception', 'inceptionv2']: print("Using BN Inception") if pretrained: print('Loading weights...') model = bninception(pretrained="imagenet") else: model = bninception(pretrained=None) model.global_pool = nn.AdaptiveAvgPool2d(1) if num_channels not in [3, 4]: raise ValueError('num_channels should be 3 or 4.') if num_channels == 4: nconv = nn.Conv2d(4, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) if pretrained: nconv.weight.data[:, : 3, :, :] = model.conv1_7x7_s2.weight.data.clone( ) nconv.weight.data[:, 3, :, :] = model.conv1_7x7_s2.weight.data[:, 1, :, :].clone( ) model.conv1_7x7_s2 = nconv model.last_linear = nn.Sequential( nn.BatchNorm1d(1024), nn.Dropout(0.5), nn.Linear(1024, 28), ) if model_name == 'inceptionresnetv2': print("Using Inception Resnet v2") if pretrained: print('Loading weights...') model = inceptionresnetv2(pretrained="imagenet") else: model = inceptionresnetv2(pretrained=None) model.avgpool_1a = nn.AdaptiveAvgPool2d(1) if num_channels not in [3, 4]: raise ValueError('num_channels should be 3 or 4.') if num_channels == 4: nconv = nn.Conv2d(4, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) if pretrained: nconv.weight.data[:, : 3, :, :] = model.conv2d_1a.conv.weight.data.clone( ) nconv.weight.data[:, 3, :, :] = model.conv2d_1a.conv.weight.data[:, 1, :, :].clone( ) model.conv2d_1a.conv = nconv model.last_linear = nn.Sequential( nn.BatchNorm1d(1536), nn.Dropout(0.5), nn.Linear(1536, 28), ) if model_name == 'inceptionv4': print("Using Inception v4") if pretrained: print('Loading weights...') model = inceptionv4(pretrained="imagenet") else: model = inceptionv4(pretrained=None) model.avg_pool = nn.AdaptiveAvgPool2d(1) if num_channels not in [3, 4]: raise ValueError('num_channels should be 3 or 4.') if num_channels == 4: nconv = nn.Conv2d(4, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) if pretrained: nconv.weight.data[:, :3, :, :] = model.features[ 0].conv.weight.data.clone() nconv.weight.data[:, 3, :, :] = model.features[ 0].conv.weight.data[:, 1, :, :].clone() model.features[0].conv = nconv model.last_linear = nn.Sequential( nn.BatchNorm1d(1536), nn.Dropout(0.5), nn.Linear(1536, 28), ) return model