コード例 #1
0
ファイル: network.py プロジェクト: huanggan1996/mvb_reid
    def __init__(self, num_classes, width_ratio=0.5, height_ratio=0.5):
        super(BagReID_SE_RESNEXT, self).__init__()

        self.backbone = se_resnext50_32x4d(pretrained=True)

        # global branch
        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.global_reduction = nn.Sequential(
            nn.Conv2d(2048, cfg.MODEL.GLOBAL_FEATS, 1),
            nn.BatchNorm2d(cfg.MODEL.GLOBAL_FEATS), nn.ReLU(True))
        self.global_reduction.apply(weights_init_kaiming)
        self.global_bn = nn.BatchNorm1d(cfg.MODEL.GLOBAL_FEATS)
        self.global_softmax = nn.Linear(cfg.MODEL.GLOBAL_FEATS, num_classes)
        self.global_softmax.apply(weights_init_kaiming)

        # part branch
        self.part = Bottleneck(2048, 512)
        self.part_maxpool = nn.AdaptiveMaxPool2d((1, 1))
        self.batch_drop = BatchDrop(height_ratio, width_ratio)
        self.part_reduction = nn.Sequential(
            nn.Linear(2048, cfg.MODEL.PART_FEATS, True),
            nn.BatchNorm1d(cfg.MODEL.PART_FEATS), nn.ReLU(True))
        self.part_reduction.apply(weights_init_kaiming)
        self.part_bn = nn.BatchNorm1d(cfg.MODEL.PART_FEATS)
        self.part_softmax = nn.Linear(cfg.MODEL.PART_FEATS, num_classes)
        self.part_softmax.apply(weights_init_kaiming)
コード例 #2
0
    def __init__(self, num_classes=3, num_filters=32,
             pretrained=True, is_deconv=True):
        super().__init__()
        self.num_classes = num_classes
        pretrain = 'imagenet' if pretrained is True else None
        self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained=pretrain)
        bottom_channel_nr = 2048

        self.conv1 = self.encoder.layer0
        #self.se_e1 = SCSEBlock(64)
        self.conv2 = self.encoder.layer1
        #self.se_e2 = SCSEBlock(64 * 4)
        self.conv3 = self.encoder.layer2
        #self.se_e3 = SCSEBlock(128 * 4)
        self.conv4 = self.encoder.layer3
        #self.se_e4 = SCSEBlock(256 * 4)
        self.conv5 = self.encoder.layer4
        #self.se_e5 = SCSEBlock(512 * 4)

        self.center = DecoderCenter(bottom_channel_nr, num_filters * 8 *2, num_filters * 8, False)

        self.dec5 = DecoderBlockV(bottom_channel_nr + num_filters * 8, num_filters * 8 * 2, num_filters * 2, is_deconv)
        #self.se_d5 = SCSEBlock(num_filters * 2)
        self.dec4 = DecoderBlockV(bottom_channel_nr // 2 + num_filters * 2, num_filters * 8, num_filters * 2, is_deconv)
        #self.se_d4 = SCSEBlock(num_filters * 2)
        self.dec3 = DecoderBlockV(bottom_channel_nr // 4 + num_filters * 2, num_filters * 4, num_filters * 2, is_deconv)
        #self.se_d3 = SCSEBlock(num_filters * 2)
        self.dec2 = DecoderBlockV(bottom_channel_nr // 8 + num_filters * 2, num_filters * 2, num_filters * 2, is_deconv)
        #self.se_d2 = SCSEBlock(num_filters * 2)
        self.dec1 = DecoderBlockV(num_filters * 2, num_filters, num_filters * 2, is_deconv)
        #self.se_d1 = SCSEBlock(num_filters * 2)
        self.dec0 = ConvRelu(num_filters * 10, num_filters * 2)
        self.final = nn.Conv2d(num_filters * 2, num_classes, kernel_size=1)
コード例 #3
0
    def __init__(self, model_cfg):
        super().__init__()
        self.model_cfg = model_cfg
        embed_dim = self.model_cfg.EMBED_DIM
        if self.model_cfg.IMG_ENCODER in  supported_img_encoders:
            if self.model_cfg.IMG_ENCODER == "se_resnext50_32x4d":
                self.vis_backbone = se_resnext50_32x4d()
                self.vis_backbone_bk = se_resnext50_32x4d()
                self.img_in_dim = 2048
                self.domian_vis_fc = nn.Conv2d(self.img_in_dim, embed_dim,kernel_size=1)
                self.domian_vis_fc_bk = nn.Conv2d(self.img_in_dim, embed_dim,kernel_size=1)
            else:
                self.vis_backbone = EfficientNet.from_pretrained(self.model_cfg.IMG_ENCODER)
                self.vis_backbone_bk = EfficientNet.from_pretrained(self.model_cfg.IMG_ENCODER)
                self.img_in_dim = self.vis_backbone.out_channels
                self.domian_vis_fc = nn.Linear(self.img_in_dim, embed_dim)
                self.domian_vis_fc_bk = nn.Linear(self.img_in_dim, embed_dim)

        else:
            assert self.model_cfg.IMG_ENCODER in supported_img_encoders, "unsupported img encoder"
        self.bert_model = RobertaModel.from_pretrained(model_cfg.BERT_NAME)
        for p in  self.bert_model.parameters():
            p.requires_grad = False
        self.logit_scale = nn.Parameter(torch.ones(()), requires_grad=True)
        
        self.domian_vis_fc_merge = nn.Sequential(nn.Linear(embed_dim, embed_dim), nn.BatchNorm1d(embed_dim),nn.ReLU(), nn.Linear(embed_dim, embed_dim))
        self.vis_car_fc = nn.Sequential(nn.BatchNorm1d(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2))
        self.lang_car_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2))
        self.vis_motion_fc = nn.Sequential(nn.BatchNorm1d(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2))
        self.lang_motion_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.ReLU(),nn.Linear(embed_dim, embed_dim//2))

        self.domian_lang_fc = nn.Sequential(nn.LayerNorm(embed_dim),nn.Linear(embed_dim, embed_dim), nn.ReLU(), nn.Linear(embed_dim, embed_dim))
        if self.model_cfg.car_idloss:
            self.id_cls = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS))
        if self.model_cfg.mo_idloss:   
            self.id_cls2 = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS))
        if self.model_cfg.share_idloss:  
            self.id_cls3 = nn.Sequential(nn.Linear(embed_dim, embed_dim),nn.BatchNorm1d(embed_dim), nn.ReLU(),nn.Linear(embed_dim, self.model_cfg.NUM_CLASS))
コード例 #4
0
    def __init__(self, num_classes=1):

        super().__init__()
        self.is_deconv = False
        is_deconv = False
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = se_resnext50_32x4d(num_classes=1000,
                                          pretrained='imagenet')

        self.relu = nn.ReLU(inplace=True)
        self.conv1 = nn.Sequential(
            self.encoder.layer0.conv1,
            self.encoder.layer0.bn1,
            self.encoder.layer0.relu1,
        )

        self.conv2 = self.encoder.layer1
        self.conv3 = self.encoder.layer2
        self.conv4 = self.encoder.layer3
        self.conv5 = self.encoder.layer4

        bottom_channel_nr = 2048

        self.center = nn.Sequential(Conv3BN(2048, 512, bn=True),
                                    Conv3BN(512, 256, bn=True), self.pool)

        self.dec5 = DecoderBlockV3(bottom_channel_nr + 256, 512, 64)
        self.dec4 = DecoderBlockV3(bottom_channel_nr // 2 + 64, 256, 64)
        self.dec3 = DecoderBlockV3(bottom_channel_nr // 4 + 64, 128, 64)
        self.dec2 = DecoderBlockV3(bottom_channel_nr // 8 + 64, 64, 64)
        self.dec1 = DecoderBlockV3(64, 32, 64)

        self.final = nn.Sequential(
            nn.Conv2d(320, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 1, kernel_size=1, padding=0),
        )
コード例 #5
0
    def __init__(self, type, num_classes):
        super().__init__()

        if type == "seresnext50":
            self.senet = se_resnext50_32x4d(pretrained="imagenet")

            # layer0_modules = [
            #     ('conv1', self.senet.layer0.conv1),
            #     ('bn1', self.senet.layer0.bn1),
            #     ('relu1', self.senet.layer0.relu1),
            # ]
            # self.layer0 = nn.Sequential(OrderedDict(layer0_modules))

            self.layer0 = self.senet.layer0
        elif type == "seresnext101":
            self.senet = se_resnext101_32x4d(pretrained="imagenet")
            self.layer0 = self.senet.layer0
        elif type == "seresnet50":
            self.senet = se_resnet50(pretrained="imagenet")
            self.layer0 = self.senet.layer0
        elif type == "seresnet101":
            self.senet = se_resnet101(pretrained="imagenet")
            self.layer0 = self.senet.layer0
        elif type == "seresnet152":
            self.senet = se_resnet152(pretrained="imagenet")
            self.layer0 = self.senet.layer0
        elif type == "senet154":
            self.senet = senet154(pretrained="imagenet")
            self.layer0 = self.senet.layer0
        else:
            raise Exception("Unsupported senet model type: '{}".format(type))

        self.expand_channels = ExpandChannels2d(3)
        self.bn = nn.BatchNorm2d(3)

        self.avg_pool = nn.AdaptiveAvgPool2d(output_size=1)
        self.dropout = nn.Dropout(0.2)
        self.last_linear = nn.Linear(2048, num_classes)
コード例 #6
0
def load_model(model_name='resnet50',resume='Best',start_epoch=0,cn=3,
               save_dir='saved_models/',width=32,start=8,cls_number=10,avg_number=1,gpus=[0,1,2,3,4,5,6,7],kfold = 1,model_times=0,train=True):
    
    load_dict = None
    #load_dict = True if cn == 3 else None

    if model_name == 'resnet50':
        model = resnet50(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'resnet101':
        model = resnet101(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'resnet152':
        model = resnet152(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'densenet161':
        model = densenet161(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'xception':
        model = xception(num_classes=cls_number,pretrained=load_dict)
        model.conv1 = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'inception_v3':
        model = inception_v3(num_classes=cls_number,pretrained=load_dict)
        model.Conv2d_1a_3x3.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'seinception_v3':
        model = se_inception_v3(num_classes=cls_number)
        model.model.Conv2d_1a_3x3.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'inception_v4':
        model = inceptionv4(num_classes=cls_number,pretrained=load_dict)
        model.features[0].conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'inceptionresnetv2':
        model = inceptionresnetv2(num_classes=cls_number,pretrained=load_dict)
        model.conv2d_1a.conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'seresnet50':
        model = se_resnet50(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'seresnet101':
        model = se_resnet101(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'seresnet152':
        model = se_resnet152(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'seresnext50':
        model = se_resnext50_32x4d(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'seresnext101':
        model = se_resnext101_32x4d(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'resnet50-101':
        model = SimpleNet()
    elif model_name == 'seresnet20':
        model = se_resnet20(num_classes=cls_number)
    elif model_name == 'seresnet32':
        model = se_resnet32(num_classes=cls_number)
    elif model_name == 'seresnet18':
        model = se_resnet18(num_classes=cls_number)
    elif model_name == 'seresnet34':
        model = se_resnet34(num_classes=cls_number)
    elif model_name == 'senet154':
        model = senet154(num_classes=cls_number,pretrained=load_dict)
        model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    elif model_name == 'nasnet':
        model = nasnetalarge(num_classes=cls_number,pretrained=load_dict)
        model.conv0.conv = nn.Conv2d(cn, 96, kernel_size=(3, 3), stride=(2, 2), bias=False)
    elif model_name == 'dpn98':
        model = dpn98(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'dpn107':
        model = dpn107(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'dpn92':
        model = dpn92(num_classes=cls_number,pretrained=load_dict)
    elif model_name == 'polynet':
        model = polynet(num_classes=cls_number,pretrained=load_dict)
        model.stem.conv1[0].conv = nn.Conv2d(cn, 32, kernel_size=(3, 3), stride=(2, 2), bias=False) 
    elif model_name == 'pnasnet':
        model = pnasnet5large(num_classes=cls_number,pretrained=load_dict)
        model.conv_0.conv = nn.Conv2d(cn, 96, kernel_size=(3, 3), stride=(2, 2), bias=False) 
    
    #print(model)

    if '-' not in model_name and load_dict != True:
      if model_name in ['dpn98',]:
        model.features.conv1_1.conv = nn.Conv2d(cn, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      elif model_name in ['dpn92',]:
        model.features.conv1_1.conv = nn.Conv2d(cn, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      elif model_name in ['seresnet20','seresnet32']:
        model.conv1 = nn.Conv2d(cn, 16, kernel_size=3, stride=1, padding=1, bias=False)
      elif model_name in ['seresnet18','seresnet34']:
        model.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False)
      elif 'seresnext' in model_name:
        model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False)
      elif 'seresnet' in model_name:
        model.layer0.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False)
      elif 'resnet' in model_name:
        model.conv1 = nn.Conv2d(cn, 64, kernel_size=7, stride=2, padding=3, bias=False)
        #model.fc = torch.nn.Linear(model.fc.in_features,cls_number)
      elif 'densenet' in model_name:
        model.features.conv0 = nn.Conv2d(cn, 96, kernel_size=7, stride=2, padding=3, bias=False)

      model.avgpool = torch.nn.AdaptiveAvgPool2d(output_size=1)
    else:
      pass

    #print(model)

    load_model = False
    #if model_name == 'resnet50':
    if load_dict != True and model_name == 'resnet50' and 0:
       base_model = resnet50(pretrained=True)
       model_dict = model.state_dict()
       new_state_dict = OrderedDict()
       for k, v in base_model.state_dict().items()[1:-2]:
           new_state_dict[k] = v
       model_dict.update(new_state_dict)
       model.load_state_dict(model_dict)
       print 'load imagenet'
       load_model = True
    
    model_ = model_name + '_' + \
                   str(width) + '_' + str(start) + '_' + str(cn)
    if kfold > 1:
       model_prefix = save_dir + str(model_times) + '_' + model_
    else:
       model_prefix = save_dir + model_
    
    if resume == 'Best' and avg_number >= 1:
        weight_path = glob(model_prefix + '*pth')
        cur_index = np.argsort(-np.array([float(cur_p.split('/')[-1].split('[')[-1].split(']')[0]) for cur_p in weight_path]))
        new_state_dict = OrderedDict()
        if len(weight_path) == 0:
            resume = ''
        elif avg_number == 1:
            resume = weight_path[0]
        else:
          for cnt,index in zip(range(avg_number),cur_index[:avg_number]):
            cur_resume = weight_path[index]
            print cur_resume
            model.load_state_dict(torch.load(cur_resume))
            for k, v in model.state_dict().items():
              if cnt == 0:
                new_state_dict[k] = v
              else:
                new_state_dict[k] = new_state_dict[k] + v
              if cnt == avg_number - 1:
                new_state_dict[k] = new_state_dict[k] / float(avg_number)
          model.load_state_dict(new_state_dict)
        if train == False:
          for index in cur_index[avg_number + 2:]:
            cur_resume = weight_path[index]
            print('remove resume %s ' %cur_resume)
            os.remove(cur_resume)
    if resume != '' and avg_number == 1:
        start_epoch = int(resume.split('-')[-3])
        #print('resuming finetune from %s'%resume)
        logging.info('resuming finetune from %s'%resume)
        model.load_state_dict(torch.load(resume))

    print('start-epoch : ',start_epoch)

    cuda_avail = torch.cuda.is_available()
    if cuda_avail:
       print 'cuda_avail: True'
       if len(gpus) > 1:
           model = torch.nn.DataParallel(model,device_ids=gpus).cuda()
       else:
           model = model.cuda()
    return model,start_epoch
コード例 #7
0
ファイル: __init__.py プロジェクト: michael-fonder/ST-CLSTM
    'DenseNet161':
    lambda: E_densenet(densenet161(pretrained=True)),
    'DenseNet169':
    lambda: E_densenet(densenet169(pretrained=True)),
    'DenseNet201':
    lambda: E_densenet(densenet201(pretrained=True)),
    'SENet154':
    lambda: E_senet(senet154(pretrained="imagenet")),
    'SE_ResNet50':
    lambda: E_senet(se_resnet50(pretrained="imagenet")),
    'SE_ResNet101':
    lambda: E_senet(se_resnet101(pretrained="imagenet")),
    'SE_ResNet152':
    lambda: E_senet(se_resnet152(pretrained="imagenet")),
    'SE_ResNext50_32x4d':
    lambda: E_senet(se_resnext50_32x4d(pretrained="imagenet")),
    'SE_ResNext101_32x4d':
    lambda: E_senet(se_resnext101_32x4d(pretrained="imagenet"))
}


def get_models(args):
    backbone = args.backbone

    if os.getenv('TORCH_MODEL_ZOO') != args.pretrained_dir:
        os.environ['TORCH_MODEL_ZOO'] = args.pretrained_dir
    else:
        pass

    return __models_small__[backbone]()
コード例 #8
0
    def __init__(self, num_classes=1):

        super().__init__()
        self.is_deconv = False
        is_deconv = False
        self.num_classes = num_classes

        self.pool = nn.MaxPool2d(2, 2)

        self.encoder = se_resnext50_32x4d(num_classes=1000,
                                          pretrained='imagenet')
        #self.encoder = se_resnext50_32x4d(num_classes=1000, pretrained=None)

        self.relu = nn.ReLU(inplace=True)

        #self.conv1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False),
        #                           self.encoder.layer0.bn1,
        #                           self.encoder.layer0.relu1,
        #                           )

        self.conv1 = nn.Sequential(
            self.encoder.layer0.conv1,
            self.encoder.layer0.bn1,
            self.encoder.layer0.relu1,
        )

        self.conv2 = self.encoder.layer1

        #self.conv2 = nn.Sequential(nn.MaxPool2d(kernel_size =2, stride = 2),
        #                         self.encoder.layer1
        #                           )
        self.conv3 = self.encoder.layer2
        self.conv4 = self.encoder.layer3
        self.conv5 = self.encoder.layer4

        bottom_channel_nr = 2048

        self.center = nn.Sequential(Conv3BN(2048, 512, bn=True),
                                    Conv3BN(512, 256, bn=True), self.pool)

        self.dec5 = DecoderBlockV3(bottom_channel_nr + 256, 512, 64)
        self.dec4 = DecoderBlockV3(bottom_channel_nr // 2 + 64, 256, 64)
        self.dec3 = DecoderBlockV3(bottom_channel_nr // 4 + 64, 128, 64)
        self.dec2 = DecoderBlockV3(bottom_channel_nr // 8 + 64, 64, 64)
        self.dec1 = DecoderBlockV3(64, 32, 64)

        self.fuse_pixel = nn.Sequential(
            nn.Conv2d(320, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
        )

        self.logit_pixel = nn.Conv2d(64, 1, kernel_size=1, padding=0)

        self.fuse_image = nn.Sequential(nn.Linear(2048, 128),
                                        nn.ReLU(inplace=True))

        self.logit_image = nn.Linear(128, 1)

        #self.fuse = nn.Sequential(
        #    nn.Conv2d(128, 64, kernel_size=3, padding=0),
        #    nn.ReLU(inplace=True))

        # self.fuse = nn.Sequential(
        #    nn.Conv2d(128, 64, kernel_size=3, padding=0),
        #    nn.ReLU(inplace=True))

        self.logit = nn.Conv2d(64, 1, kernel_size=1, padding=0)