def __init__(self, layers=18, classes=2, with_sp=True): super(BiseNet, self).__init__() self.with_sp = with_sp if layers == 18: resnet = models.resnet18(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) if self.with_sp: self.sp = SpatialPath(in_channels=3, out_channels=128) self.cp = ContextPath(in_channels=3, out_channels=128, backbone=resnet) self.ffm = FeatureFusionModule(in_channels=256, out_channels=256) # concat: 128+128 self.conv_out = BiseNetHead(in_channels=256, mid_channels=256, classes=classes) if self.training: self.conv_out16 = BiseNetHead(in_channels=128, mid_channels=64, classes=classes) self.conv_out32 = BiseNetHead(in_channels=128, mid_channels=64, classes=classes)
def __init__(self, layers, in_channels=192, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), pretrained=False, deep_base=False) -> None: super(ResNetDCT_345, self).__init__() if layers == 18: resnet = resnet18(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 34: resnet = resnet34(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 50: resnet = resnet50(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 101: resnet = resnet101(pretrained, deep_base, strides=strides, dilations=dilations) self.layer2, self.layer3, self.layer4, self.avgpool, self.fc = \ resnet.layer2, resnet.layer3, resnet.layer4, resnet.avgpool, resnet.fc self.relu = nn.ReLU(inplace=True) out_ch = self.layer2[0].conv1.out_channels ks = self.layer2[0].conv1.kernel_size stride = self.layer2[0].conv1.stride padding = self.layer2[0].conv1.padding self.layer2[0].conv1 = nn.Conv2d(in_channels, out_ch, kernel_size=ks, stride=stride, padding=padding, bias=False) init_weight(self.layer2[0].conv1) out_ch = self.layer2[0].downsample[0].out_channels self.layer2[0].downsample[0] = nn.Conv2d(in_channels, out_ch, kernel_size=1, stride=2, bias=False) init_weight(self.layer2[0].downsample[0])
def __init__(self, embedding_size, num_classes, backbone='resnet18', mode='t'): super(background_resnet, self).__init__() self.trainMode = mode self.backbone = backbone # copying modules from pretrained models if backbone == 'resnet50': self.pretrained = resnet.resnet50(pretrained=False) elif backbone == 'resnet101': self.pretrained = resnet.resnet101(pretrained=False) elif backbone == 'resnet152': self.pretrained = resnet.resnet152(pretrained=False) elif backbone == 'resnet18': self.pretrained = resnet.resnet18(pretrained=False) elif backbone == 'resnet34': self.pretrained = resnet.resnet34(pretrained=False) else: raise RuntimeError('unknown backbone: {}'.format(backbone)) self.fc0 = nn.Linear(128, embedding_size[0]) # task specific layers for task 1 self.fc1 = nn.Linear(128, embedding_size[1]) self.bn1 = nn.BatchNorm1d(embedding_size[1]) self.relu1 = nn.ReLU() self.last1 = nn.Linear(embedding_size[1], num_classes) # task speicific layers for task 2 self.fc2 = nn.Linear(128, embedding_size[2]) self.bn2 = nn.BatchNorm1d(embedding_size[2]) self.relu2 = nn.ReLU() self.last2 = nn.Linear(embedding_size[2], num_classes)
def __init__(self, model, modality='rgb', inp=3, num_classes=150, input_size=224, input_segments=8, dropout=0.5): super(tsn_model, self).__init__() if modality == 'flow': inp = 10 self.num_classes = num_classes self.inp = inp self.input_segments = input_segments self._enable_pbn = False if model == 'resnet18': self.model = resnet.resnet18(inp=inp, pretrained=True) elif model == 'resnet34': self.model = resnet.resnet34(inp=inp, pretrained=True) elif model == 'resnet50': self.model = resnet.resnet50(inp=inp, pretrained=True) elif model == 'resnet101': self.model = resnet.resnet101(inp=inp, pretrained=True) elif model == 'bn_inception': self.model = bn_inception.bninception(inp=inp) self.modality = modality self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(p=dropout) in_channels = self.model.fc.in_features self.model.fc = None self.fc = nn.Linear(in_channels, num_classes) self.consensus = basic_ops.ConsensusModule('avg')
def __init__(self, layers=18, dropout=0.1, classes=2): super(FFTNet23, self).__init__() if layers == 18: resnet = models.resnet18(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 self.freq = nn.ModuleList() for i in range(6, 10): # the number of in_channels is 2^i self.freq.append( FeatureFrequencySeparationModule( in_channels=2**i, up_channels=2**i if i == 6 else 2**(i - 1), smf_channels=128, high_ratio=1 - 0.2 * (i - 5), # high_ratio=0.5, low_ratio=0.2, up_flag=False if i == 6 else True, smf_flag=True if i % 2 == 0 else False, )) self.fa_cls_seg = nn.Sequential(nn.Dropout2d(p=dropout), nn.Conv2d(256, classes, kernel_size=1)) if self.training: self.aux = nn.Sequential( nn.Conv2d(aux_dim, aux_dim // 4, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(aux_dim // 4), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(aux_dim // 4, classes, kernel_size=1))
def __init__(self, out_planes=1, ccm=True, norm_layer=nn.BatchNorm2d, is_training=True, expansion=2, base_channel=32): super(CPFNet, self).__init__() self.backbone = resnet34(pretrained=True) self.expansion = expansion self.base_channel = base_channel if self.expansion == 4 and self.base_channel == 64: expan = [512, 1024, 2048] spatial_ch = [128, 256] elif self.expansion == 4 and self.base_channel == 32: expan = [256, 512, 1024] spatial_ch = [32, 128] conv_channel_up = [256, 384, 512] elif self.expansion == 2 and self.base_channel == 32: expan = [128, 256, 512] spatial_ch = [64, 64] conv_channel_up = [128, 256, 512] conv_channel = expan[0] self.is_training = is_training self.sap = SAPblock(expan[-1]) self.decoder5 = DecoderBlock(expan[-1], expan[-2], relu=False, last=True) #256 self.decoder4 = DecoderBlock(expan[-2], expan[-3], relu=False) #128 self.decoder3 = DecoderBlock(expan[-3], spatial_ch[-1], relu=False) #64 self.decoder2 = DecoderBlock(spatial_ch[-1], spatial_ch[-2]) #32 self.mce_2 = GPG_2([spatial_ch[-1], expan[0], expan[1], expan[2]], width=spatial_ch[-1], up_kwargs=up_kwargs) self.mce_3 = GPG_3([expan[0], expan[1], expan[2]], width=expan[0], up_kwargs=up_kwargs) self.mce_4 = GPG_4([expan[1], expan[2]], width=expan[1], up_kwargs=up_kwargs) self.main_head = BaseNetHead(spatial_ch[0], out_planes, 2, is_aux=False, norm_layer=norm_layer) self.relu = nn.ReLU()
def __init__(self, classNum, pretrained=True): super(MA_quality_check_res34, self).__init__() self.base = resnet.resnet34(pretrained=pretrained) self.num_att = classNum # print ((self.base)) # exit() self.classifier = nn.Linear(512, self.num_att) init.normal(self.classifier.weight, std=0.001) init.constant(self.classifier.bias, 0)
def __init__(self, layers=50, dropout=0.1, classes=2, use_dct=True, use_bise=True, vec_dim=300): super(DCTNet, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 self.use_dct = use_dct self.use_bise = use_bise self.vec_dim = vec_dim if layers == 18: resnet = models.resnet18(pretrained=False, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) elif layers == 101: resnet = models.resnet101_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 down_dim = fea_dim // 4 if use_dct: self.dct_encoding = DCTModule(vec_dim=self.vec_dim) if use_bise: self.ffm = FeatureFusionModule( in_channels=self.vec_dim + 128, out_channels=fea_dim) # concat: 128+128 self.cls = nn.Sequential( nn.Conv2d(fea_dim, down_dim, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(down_dim), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1) ) if self.training: self.aux = nn.Sequential( nn.Conv2d(aux_dim, aux_dim // 4, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(aux_dim // 4), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(aux_dim // 4, classes, kernel_size=1) )
def create_model(opt=None): assert opt.model in ['alexnet', 'googlenet', 'lenet', 'mobilenetv2', 'resnet34', 'resnet101', 'vgg11', 'vgg13', 'vgg16', 'vgg19', 'mobilenetv3small', 'mobilenetv3wen'] if opt.model == 'mobilenetv2': model = mobilenet.MobileNetV2(num_classes=opt.n_classes, ) print('net is mobilenetv2!') opt.model_save_dir='./weights/mobilenetv2' elif opt.model == 'alexnet': model = alexnet.AlexNet(num_classes=opt.n_classes, init_weights=True) print('net is alexnet!') opt.model_save_dir = './weights/alexnet' elif opt.model == 'googlenet': model = googlenet.GoogLeNet(num_classes=opt.n_classes, init_weights=True) print('net is googlenet!') opt.model_save_dir = './weights/googlenet' elif opt.model == 'lenet': model = lenet.LeNet(num_classes=opt.n_classes) print('net is lenet!') opt.model_save_dir = './weights/lenet' elif opt.model == 'resnet34': model = resnet.resnet34(num_classes=opt.n_classes) print('net is resnet34!') opt.model_save_dir = './weights/resnet34' elif opt.model == 'resnet101': model = resnet.resnet101(num_classes=opt.n_classes) print('net is resnet101!') opt.model_save_dir = './weights/resnet101' elif opt.model == 'vgg11': model = vgg.vgg(model_name="vgg11", num_classes=opt.n_classes, init_weights=True) print('net is vgg11!') opt.model_save_dir = './weights/vgg11' elif opt.model == 'vgg13': model = vgg.vgg(model_name="vgg13", num_classes=opt.n_classes, init_weights=True) print('net is vgg13!') opt.model_save_dir = './weights/vgg13' elif opt.model == 'vgg16': model = vgg.vgg(model_name="vgg16", num_classes=opt.n_classes, init_weights=True) print('net is vgg16!') opt.model_save_dir = './weights/vgg16' elif opt.model == 'vgg19': model = vgg.vgg(model_name="vgg19", num_classes=opt.n_classes, init_weights=True) print('net is vgg19!') opt.model_save_dir = './weights/vgg19' elif opt.model == 'mobilenetv3small': model = mobilenetv3_small.MobileNetV3_small(num_classes=opt.n_classes) print('net is mobilenetv3small!') opt.model_save_dir = './weights/mobilenetv3small' elif opt.model == 'mobilenetv3wen': model = mobilenetv3_wen.MobileNetV3_small(num_classes=opt.n_classes) print('net is mobilenetv3wen!') opt.model_save_dir = './weights/mobilenetv3wen' return opt, model
def __init__(self, layers=50, dropout=0.1, classes=2, use_dct=True, vec_dim=300): super(DCTNet, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 self.use_dct = use_dct self.vec_dim = vec_dim if layers == 18: resnet = models.resnet18(pretrained=False, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3 = resnet.layer1, resnet.layer2, resnet.layer3 if layers == 18 or layers == 34: fea_dim = 256 down_dim = fea_dim // 4 if use_dct: self.dct_encoding = DCTModule(vec_dim=self.vec_dim) self.up_conv = ConvBNReLU(fea_dim, fea_dim, ks=1, stride=1, padding=0) self.cls = nn.Sequential( nn.Conv2d(fea_dim, down_dim, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(down_dim), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1))
def main(): pth_path = '/home/zhaoliu/car_brand/car_mid/results_perspective/per_1/save_50.pth' # test_result=Path('/home/zhaoliu/car_class+ori/test_result') # test_path = '/home/zhaoliu/car_data/训练数据/4.9新加测试集/val_lmdb' # keys_path = '/home/zhaoliu/car_data/训练数据/4.9新加测试集/new_val.npy' test_path = "/mnt/disk/zhaoliu_data/perspective/lmdb/test" keys_path = '/home/zhaoliu/car_brand/perspective_data/prespective_test.npy' model = resnet34(num_classes=1189) model = resume_model(pth_path, model) test_loader = get_test_utils(test_path, keys_path) print('数据加载完毕...') test(model, test_loader)
def main(): pth_path = '/home/zhaoliu/car_brand/car_mid/results_perspective/per_1/save_50.pth' test_result = '/home/zhaoliu/car_brand/car_mid/badcase_per/' test_path = '/mnt/disk/zhaoliu_data/perspective/lmdb/test' keys_path = '/home/zhaoliu/car_brand/perspective_data/prespective_test.npy' midnpy_path = test_result + 'mid_badkeys.npy' midtxt_path = test_result + 'mid_badcase_info.txt' model = resnet34(num_classes=1189) model = resume_model(pth_path, model) test_loader = get_test_utils(test_path, keys_path) print('数据加载完毕...') test(model, test_loader, midnpy_path, midtxt_path)
def __init__(self, layers=50, dropout=0.1, classes=2, fuse=8): super(TriSeNet1, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 self.fuse = fuse # Backbone if layers == 18: resnet = models.resnet18(pretrained=False, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) elif layers == 101: resnet = models.resnet101_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 down_dim = fea_dim // 4 self.cls = nn.Sequential( nn.Conv2d(fea_dim, down_dim, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(down_dim), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1) ) if self.fuse == 16 or self.fuse == 8: self.fuse_16 = nn.Conv2d(fea_dim//2, classes, kernel_size=1) if self.fuse == 8: self.fuse_8 = nn.Conv2d(fea_dim//4, classes, kernel_size=1)
def __init__(self, layers=18, classes=2): super(TriSeNet, self).__init__() if layers == 18: backbone = models.resnet18(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: backbone = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) # the initial layer conv is 7x7, instead of three 3x3 self.layer0 = nn.Sequential(backbone.conv1, backbone.bn1, backbone.relu, backbone.maxpool) # stage channels for resnet18 and resnet34 are:(64, 128, 256, 512) self.layer1, self.layer2, self.layer3, self.layer4 \ = backbone.layer1, backbone.layer2, backbone.layer3, backbone.layer4 # self.gap = nn.AdaptiveAvgPool2d(1) # Global Average Pooling # self.up_16_8 = UpModule(in_channels=256, out_channels=128, up_scale=2) # feat_16 up to the size of feat_8 # self.up_32_8 = UpModule(in_channels=512, out_channels=128, up_scale=4) # self.down_8_16 = DownModule(in_channels=128, out_channels=256, down_scale=2) # feat_8 down to the size of feat_16 # self.up_32_16 = UpModule(in_channels=512, out_channels=256, up_scale=2) self.down_8_32 = DownModule(in_channels=128, out_channels=512, down_scale=4) # self.down_16_32 = DownModule(in_channels=256, out_channels=512, down_scale=2) self.relu = nn.ReLU(inplace=True) self.sa_8_32 = SelfAttentionBlock(512) # self.ca_32_8 = ChannelAttentionModule(in_channels=128, reduction=4) # self.cp = ContextPath(in_channels=3, out_channels=128, backbone=resnet) self.seg_head = SegHead(in_channels=640, mid_channels=256, classes=classes)
def __init__(self, embedding_size, num_classes, backbone='resnet50'): super(background_resnet, self).__init__() self.backbone = backbone # copying modules from pretrained models if backbone == 'resnet50': self.pretrained = resnet.resnet50(pretrained=False) elif backbone == 'resnet101': self.pretrained = resnet.resnet101(pretrained=False) elif backbone == 'resnet152': self.pretrained = resnet.resnet152(pretrained=False) elif backbone == 'resnet18': self.pretrained = resnet.resnet18(pretrained=False) elif backbone == 'resnet34': self.pretrained = resnet.resnet34(pretrained=False) else: raise RuntimeError('unknown backbone: {}'.format(backbone)) self.fc0 = nn.Linear(512, embedding_size) self.bn0 = nn.BatchNorm1d(embedding_size) self.relu = nn.ReLU() self.last = nn.Linear(embedding_size, num_classes)
def __init__(self, layers, in_channels=192, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1), pretrained=False, deep_base=False) -> None: super(ResNetDCT_2345, self).__init__() self.layers = layers if layers == 18: resnet = resnet18(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 34: resnet = resnet34(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 50: resnet = resnet50(pretrained, deep_base, strides=strides, dilations=dilations) elif layers == 101: resnet = resnet101(pretrained, deep_base, strides=strides, dilations=dilations) self.layer1, self.layer2, self.layer3, self.layer4, self.avgpool, self.fc = \ resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4, resnet.avgpool, resnet.fc self.relu = nn.ReLU(inplace=True) if layers in [18, 34]: in_ch = self.layer1[0].conv1.in_channels self.down_layer = nn.Sequential( nn.Conv2d(in_channels, in_ch, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(in_ch), nn.ReLU(inplace=True) ) # initialize the weight for only one layer for m in self.down_layer.modules(): init_weight(m) else: out_ch = self.layer1[0].conv1.out_channels self.layer1[0].conv1 = nn.Conv2d(in_channels, out_ch, kernel_size=1, stride=1, bias=False) init_weight(self.layer1[0].conv1) out_ch = self.layer1[0].downsample[0].out_channels self.layer1[0].downsample[0] = nn.Conv2d(in_channels, out_ch, kernel_size=1, stride=1, bias=False) init_weight(self.layer1[0].downsample[0])
def get_model(model_type='resnet50', num_classes=1000): # TODO: Add more backbones if model_type == 'resnet34': model = resnet.resnet34(pretrained=True) elif model_type == 'resnet50': model = resnet.resnet50(pretrained=True) elif model_type == 'resnet101': model = resnet.resnet101(pretrained=True) elif model_type == 'resnet152': model = resnet.resnet152(pretrained=True) elif model_type == 'resnext50_32x4d': model = resnet.resnext50_32x4d(pretrained=True) elif model_type == 'resnext101_32x8d': model = resnet.resnext101_32x8d(pretrained=True) elif model_type == 'res2net_v1b_50': model = res2net50_v1b_26w_4s(pretrained=True) elif model_type == 'res2net_v1b_101': model = res2net101_v1b_26w_4s(pretrained=True) elif model_type == 'res2net50_26w_4s': model = res2net50_26w_4s(pretrained=True) elif model_type == 'res2net101_26w_4s': model = res2net101_26w_4s(pretrained=True) elif model_type == 'res2next50': model = res2next50(pretrained=True) elif model_type == 'senet154': model = senet.senet154(num_classes=num_classes, pretrained='imagenet') elif model_type == 'resnest50': model = resnest50(pretrained=True) elif model_type == 'resnest101': model = resnest101(pretrained=True) elif model_type == 'resnest200': model = resnest200(pretrained=True) elif model_type == 'resnest269': model = resnest269(pretrained=True) else: model = resnet.resnet50(pretrained=True) return model
def __init__(self, in_channels=3, n_classes=1, feature_scale=2, is_deconv=True, is_batchnorm=True): super(UNet, self).__init__() self.backbone = resnet34(pretrained=True) self.in_channels = in_channels self.feature_scale = feature_scale self.is_deconv = is_deconv self.is_batchnorm = is_batchnorm filters = [64, 128, 256, 512, 1024] filters = [int(x / self.feature_scale) for x in filters] # downsampling self.maxpool = nn.MaxPool2d(kernel_size=2) self.conv1 = unetConv2(self.in_channels, filters[0], self.is_batchnorm) self.conv2 = unetConv2(filters[0], filters[1], self.is_batchnorm) self.conv3 = unetConv2(filters[1], filters[2], self.is_batchnorm) self.conv4 = unetConv2(filters[2], filters[3], self.is_batchnorm) self.center = unetConv2(filters[3], filters[4], self.is_batchnorm) # upsampling self.up_concat4 = unetUp(filters[4], filters[3], self.is_deconv) self.up_concat3 = unetUp(filters[3], filters[2], self.is_deconv) self.up_concat2 = unetUp(filters[2], filters[1], self.is_deconv) self.up_concat1 = unetUp(filters[1], filters[0], self.is_deconv) # final conv (without any concat) self.final = nn.Conv2d(filters[0], n_classes, 1) # initialise weights for m in self.modules(): if isinstance(m, nn.Conv2d): init_weights(m, init_type='kaiming') elif isinstance(m, nn.BatchNorm2d): init_weights(m, init_type='kaiming')
def main(): # Training settings parser = argparse.ArgumentParser(description='SSDA Classification') parser.add_argument('--steps', type=int, default=50000, metavar='N', help='maximum number of iterations ' 'to train (default: 50000)') parser.add_argument( '--method', type=str, default='MME', choices=['S+T', 'ENT', 'MME'], help='MME is proposed method, ENT is entropy minimization,' ' S+T is training only on labeled examples') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--multi', type=float, default=0.1, metavar='MLT', help='learning rate multiplication') parser.add_argument('--T', type=float, default=0.05, metavar='T', help='temperature (default: 0.05)') parser.add_argument('--lamda', type=float, default=0.1, metavar='LAM', help='value of lamda') parser.add_argument('--save_check', action='store_true', default=False, help='save checkpoint or not') parser.add_argument('--checkpath', type=str, default='./save_model_ssda', help='dir to save checkpoint') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging ' 'training status') parser.add_argument('--save_interval', type=int, default=500, metavar='N', help='how many batches to wait before saving a model') parser.add_argument('--net', type=str, default='al exnet', help='which network to use') parser.add_argument('--source', type=str, default='real', help='source domain') parser.add_argument('--target', type=str, default='sketch', help='target domain') parser.add_argument('--dataset', type=str, default='multi', help='the name of dataset') parser.add_argument('--num', type=int, default=3, help='number of labeled examples in the target') parser.add_argument('--patience', type=int, default=5, metavar='S', help='early stopping to wait for improvment ' 'before terminating. (default: 5 (5000 iterations))') parser.add_argument('--early', action='store_false', default=True, help='early stopping on validation or not') parser.add_argument('--loss', type=str, default='CE', choices=['CE', 'FL', 'CBFL'], help='classifier loss function') parser.add_argument('--beta', type=float, default=0.99, required=False, help='beta value in CBFL loss') parser.add_argument('--gamma', type=float, default=0.5, required=False, help='gamma value in CBFL or FL') parser.add_argument('--reg', type=float, default=0.1, required=False, help='weight of semantic regularizer') parser.add_argument('--attribute', type=str, default=None, help='semantic attribute feature vector to be used') parser.add_argument( '--dim', type=int, default=50, help= 'dimensionality of the feature vector - make sure this in sync with the dim of the semantic attribute vector' ) parser.add_argument('--mode', type=str, default='train', choices=['train', 'infer'], help='mode of script train or infer') # this argument is valid only if the mode is infer parser.add_argument('--model_path', type=str, help='path to the checkpoint of the model') parser.add_argument( '--uda', type=int, default=0, help='unsupervised domain adaptation or not - 0 for ssda and 1 for uda' ) args = parser.parse_args() print('Dataset %s Source %s Target %s Labeled num perclass %s Network %s' % (args.dataset, args.source, args.target, args.num, args.net)) source_loader, target_loader, target_loader_unl, target_loader_val, \ target_loader_test, class_num_list, class_list = return_dataset(args) # class num list is returned for CBFL use_gpu = torch.cuda.is_available() record_dir = 'record/%s/%s' % (args.dataset, args.method) if not os.path.exists(record_dir): os.makedirs(record_dir) record_file = os.path.join( record_dir, '%s_net_%s_%s_to_%s_num_%s' % (args.method, args.net, args.source, args.target, args.num)) if use_gpu: device = 'cuda' else: device = 'cpu' print("Device: %s Loss: %s Attributes: %s" % (device, args.loss, args.attribute)) if use_gpu: torch.cuda.manual_seed(args.seed) else: torch.manual_seed(args.seed) if args.net == 'resnet34': G = resnet34() inc = 512 elif args.net == "alexnet": G = AlexNetBase() inc = 4096 elif args.net == "vgg": G = VGGBase() inc = 4096 else: raise ValueError('Model cannot be recognized.') params = [] for key, value in dict(G.named_parameters()).items(): if value.requires_grad: if 'classifier' not in key: params += [{ 'params': [value], 'lr': 0.1, 'weight_decay': 0.0005 }] else: params += [{ 'params': [value], 'lr': 1, 'weight_decay': 0.0005 }] # Setting the predictor layer if args.attribute is not None: if args.net == 'resnet34': F1 = Predictor_deep_attributes(num_class=len(class_list), inc=inc, feat_dim=args.dim) print("Using: Predictor_deep_attributes") else: F1 = Predictor_attributes(num_class=len(class_list), inc=inc, feat_dim=args.dim) print("Using: Predictor_attributes") else: if args.net == 'resnet34': F1 = Predictor_deep(num_class=len(class_list), inc=inc) print("Using: Predictor_deep") else: F1 = Predictor(num_class=len(class_list), inc=inc, temp=args.T) print("Using: Predictor") # Initializing the weights of the prediction layer weights_init(F1) # Setting the prediction layer weights as the semantic attributes if args.attribute is not None: att = np.load('attributes/%s_%s.npy' % (args.dataset, args.attribute)) #att = np.load('attributes/multi_%s.npy'%(args.attribute)) if use_gpu: att = nn.Parameter(torch.cuda.FloatTensor(att)) else: att = nn.Parameter(torch.FloatTensor(att, device="cpu")) if args.net == 'resnet34': F1.fc3.weight = att else: F1.fc2.weight = att print("attribute shape is: ", att.shape) lr = args.lr # If the mode is inference then load the pretrained network if args.mode == 'infer': # loading the model checkpoint main_dict = torch.load(args.model_path) G.load_state_dict(main_dict['G_state_dict']) F1.load_state_dict(main_dict['F_state_dict']) print("Loaded pretrained model weights") G.to(device) F1.to(device) if args.uda == 1: print("Using: Unsupervised domain adaptation") im_data_s = torch.FloatTensor(1) im_data_t = torch.FloatTensor(1) im_data_tu = torch.FloatTensor(1) gt_labels_t = torch.LongTensor(1) gt_labels_s = torch.LongTensor(1) sample_labels_t = torch.LongTensor(1) sample_labels_s = torch.LongTensor(1) im_data_s = im_data_s.to(device) im_data_t = im_data_t.to(device) im_data_tu = im_data_tu.to(device) gt_labels_s = gt_labels_s.to(device) gt_labels_t = gt_labels_t.to(device) sample_labels_t = sample_labels_t.to(device) sample_labels_s = sample_labels_s.to(device) im_data_s = Variable(im_data_s) im_data_t = Variable(im_data_t) im_data_tu = Variable(im_data_tu) gt_labels_s = Variable(gt_labels_s) gt_labels_t = Variable(gt_labels_t) sample_labels_t = Variable(sample_labels_t) sample_labels_s = Variable(sample_labels_s) if os.path.exists(args.checkpath) == False: os.mkdir(args.checkpath) time_stamp = datetime.now() print(time_stamp) def train(class_dist_threshold_list): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=args.gamma).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = args.beta effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=args.gamma).to(device) all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 """ x = torch.load("./freezed_models/alexnet_p2r.ckpt.best.pth.tar") G.load_state_dict(x['G_state_dict']) F1.load_state_dict(x['F1_state_dict']) optimizer_f.load_state_dict(x['optimizer_f']) optimizer_g.load_state_dict(x['optimizer_g']) """ reg_weight = args.reg for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] # condition for restarting the iteration for each of the data loaders if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) with torch.no_grad(): im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() if args.uda == 1: data = im_data_s target = gt_labels_s else: data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) #print(data.shape) output = G(data) out1 = F1(output) if args.attribute is not None: if args.net == 'resnet34': reg_loss = regularizer(F1.fc3.weight, att) loss = criterion(out1, target) + reg_weight * reg_loss else: reg_loss = regularizer(F1.fc2.weight, att) loss = criterion(out1, target) + reg_weight * reg_loss else: reg_loss = torch.tensor(0) loss = criterion(out1, target) if args.attribute is not None: if step % args.save_interval == 0 and step != 0: reg_weight = 0.5 * reg_weight print("Reduced Reg weight to: ", reg_weight) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) #print(loss_t.cpu().data.item()) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda, class_dist_threshold_list) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Reg: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, reg_weight*reg_loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Reg: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, reg_weight * reg_loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_val, acc_val = test(target_loader_val) loss_test, acc_test = test(target_loader_test) G.train() F1.train() if acc_val >= best_acc: best_acc = acc_val best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() #saving model as a checkpoint dict having many things if args.save_check: print('saving model') is_best = True if counter == 0 else False save_mymodel( args, { 'step': step, 'arch': args.net, 'G_state_dict': G.state_dict(), 'F1_state_dict': F1.state_dict(), 'best_acc_test': best_acc_test, 'optimizer_g': optimizer_g.state_dict(), 'optimizer_f': optimizer_f.state_dict(), }, is_best, time_stamp) # defining the function for in training validation and testing def test(loader): G.eval() F1.eval() test_loss = 0 correct = 0 size = 0 num_class = len(class_list) output_all = np.zeros((0, num_class)) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=args.gamma).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = args.beta effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=args.gamma).to(device) confusion_matrix = torch.zeros(num_class, num_class) with torch.no_grad(): for batch_idx, data_t in enumerate(loader): im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1]) feat = G(im_data_t) output1 = F1(feat) output_all = np.r_[output_all, output1.data.cpu().numpy()] size += im_data_t.size(0) pred1 = output1.data.max(1)[1] for t, p in zip(gt_labels_t.view(-1), pred1.view(-1)): confusion_matrix[t.long(), p.long()] += 1 correct += pred1.eq(gt_labels_t.data).cpu().sum() test_loss += criterion(output1, gt_labels_t) / len(loader) np.save("cf_target.npy", confusion_matrix) #print(confusion_matrix) print('\nTest set: Average loss: {:.4f}, ' 'Accuracy: {}/{} F1 ({:.0f}%)\n'.format(test_loss, correct, size, 100. * correct / size)) return test_loss.data, 100. * float(correct) / size # defining the function for inference which is similar to the testing function as above but with some additional functionality for calculating the distances between the class prototypes and the predicted testing samples def infer(loader): G.eval() F1.eval() test_loss = 0 correct = 0 size = 0 num_class = len(class_list) output_all = np.zeros((0, num_class)) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=1).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = 0.99 effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=0.5).to(device) # defining a nested list to store the cosine similarity (or distances) of the vectors from the class prototypes class_dist_list = [] for i in range(num_class): empty_dists = [] class_dist_list.append(empty_dists) confusion_matrix = torch.zeros(num_class, num_class) # iterating through the elements of the batch in the dataloader with torch.no_grad(): for batch_idx, data_t in enumerate(loader): im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1]) feat = G(im_data_t) output1 = F1(feat) output_all = np.r_[output_all, output1.data.cpu().numpy()] size += im_data_t.size(0) pred1 = output1.data.max(1)[1] # filling the elements of the confusion matrix for t, p in zip(gt_labels_t.view(-1), pred1.view(-1)): confusion_matrix[t.long(), p.long()] += 1 correct += pred1.eq(gt_labels_t.data).cpu().sum() test_loss += criterion(output1, gt_labels_t) / len(loader) pred1 = pred1.cpu().numpy() dists = output1.data.max(1)[0] dists = dists.cpu().numpy() # forming the lists of the distances of the predicted labels and the class prototype for label, dist in zip(pred1, dists): label = int(label) class_dist_list[label].append(dist) # sorting the distances in ascending order for each of the classes, also finding a threshold for similarity of each class summ = 0 class_dist_threshold_list = [] for class_ in range(len(class_dist_list)): class_dist_list[class_].sort() l = len(class_dist_list[class_]) tenth = l / 10 idx_tenth = math.ceil(tenth) class_dist_threshold_list.append( class_dist_list[class_][idx_tenth]) print('\nTest set: Average loss: {:.4f}, ' 'Accuracy: {}/{} F1 ({:.2f}%)\n'.format(test_loss, correct, size, 100. * correct / size)) return test_loss.data, 100. * float( correct) / size, class_dist_threshold_list # choosing the mode of the model - whether to be used for training or for inference if args.mode == 'train': print("Training the model...") train(None) if args.mode == 'infer': print("Infering from the model...") _, _, class_dist_threshold_list = infer(target_loader_test) print( "Starting model retraining using weights for entropy maximization..." ) train(class_dist_threshold_list)
if not os.path.exists(tf_record): os.mkdir(tf_record) tf_record = os.path.join(tf_record, args.arch + '_' + args.method) writer = SummaryWriter(tf_record) else: writer = None # batch size train_batchSize = [args.label_batch_size, args.unlabel_batch_size] # backbone architecture if args.arch == 'resnet18': backbone = resnet.resnet18(feature_len=args.feat_len) elif args.arch == 'resnet34': backbone = resnet.resnet34(feature_len=args.feat_len) elif args.arch == 'resnet50': backbone = resnet.resnet50(feature_len=args.feat_len) elif args.arch == 'resnet101': backbone = resnet.resnet101(feature_len=args.feat_len) elif args.arch == 'resnet152': backbone = resnet.resnet152(feature_len=args.feat_len) elif args.arch == 'usr': backbone = model_usr else: raise NameError( 'Arch %s is not support. Please enter from [resnet18, resnet34, resnet50, resnet101, resnet152, usr]' % args.arch) # head model_head = arc.ArcMarginProduct_virface(in_features=args.feat_len,
def __init__(self, layers=50, dropout=0.1, classes=2, zoom_factor=8, reduction=2, use_scale=True, mode='embedded_gaussian'): super(Nonlocal, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 assert zoom_factor in [1, 2, 4, 8] self.zoom_factor = zoom_factor self.reduction = reduction self.use_scale = use_scale self.mode = mode if layers == 18: resnet = models.resnet18(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) elif layers == 101: resnet = models.resnet101_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 down_dim = fea_dim // 4 self.conv1 = ConvBNReLU(fea_dim, down_dim, ks=3, stride=1, padding=1) self.nl_block = NonLocal2d(in_channels=down_dim, reduction=self.reduction, use_scale=self.use_scale, norm_cfg=dict(type='SyncBN', requires_grad=True), mode=self.mode) self.conv2 = ConvBNReLU(down_dim, down_dim, ks=3, stride=1, padding=1) self.cls = nn.Sequential( nn.Conv2d(fea_dim + down_dim, down_dim, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(down_dim), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1)) if self.training: self.aux = nn.Sequential( nn.Conv2d(aux_dim, aux_dim // 4, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(aux_dim // 4), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(aux_dim // 4, classes, kernel_size=1))
def __init__(self, layers=50, dropout=0.1, classes=2, vec_dim=300): super(DCTNet, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 self.vec_dim = vec_dim # Backbone if layers == 18: resnet = models.resnet18(pretrained=False, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) elif layers == 101: resnet = models.resnet101_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 down_dim = fea_dim // 4 self.dct = nn.ModuleList() for i in range(6, 10): # the number of in_channels is 2^i self.dct.append( DCTBlock( in_channels=2**i, mid_channels=32, # channels can be changed if you want. up_flag=False if i == 9 else True, up_channels=2**i + 2**(i + 1), out_channels=2**i, vec_dim=self.vec_dim)) self.cls = nn.Sequential( nn.Conv2d(self.vec_dim, down_dim, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(down_dim), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1)) if self.training: self.aux = nn.Sequential( nn.Conv2d(aux_dim, aux_dim // 4, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(aux_dim // 4), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(aux_dim // 4, classes, kernel_size=1))
self.gamma = gamma self.weight = weight def forward(self, input, target): """ input: [N, C] target: [N, ] """ logpt = F.log_softmax(input, dim=1) pt = torch.exp(logpt) logpt = (1 - pt)**self.gamma * logpt loss = F.nll_loss(logpt, target, self.weight) return loss model = nn.DataParallel(resnet34()).to(device) print(model) #criterion1 = nn.CrossEntropyLoss() criterion1 = FocalLoss() criterion2 = nn.SmoothL1Loss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, [20, 45, 60], gamma=0.2) #Train total_step = len(train_loader) curr_lr = learning_rate
Returns: return a bilinear filter tensor """ factor = (kernel_size + 1) // 2 if kernel_size % 2 == 1: center = factor - 1 else: center = factor - 0.5 og = np.ogrid[:kernel_size, :kernel_size] bilinear_filter = (1 - abs(og[0] - center) / factor) * (1 - abs(og[1] - center) / factor) weight = np.zeros((in_channels, out_channels, kernel_size, kernel_size), dtype=np.float32) weight[range(in_channels), range(out_channels), :, :] = bilinear_filter return torch.from_numpy(weight) pretrained_net = resnet.resnet34(pretrained=True) pretrained_net.load_state_dict(torch.load(r'./pth/resnet34-333f7ec4.pth')) class FcnResNet(nn.Module): def __init__(self, num_classes): super().__init__() self.stage1 = nn.Sequential(*list(pretrained_net.children())[:-4]) self.stage2 = list(pretrained_net.children())[-4] self.stage3 = list(pretrained_net.children())[-3] self.scores1 = nn.Conv2d(512, num_classes, 1) self.scores2 = nn.Conv2d(256, num_classes, 1) self.scores3 = nn.Conv2d(128, num_classes, 1)
def __init__(self, layers=50, dropout=0.1, classes=2, zoom_factor=8): super(DANet, self).__init__() assert layers in [18, 34, 50, 101] assert classes > 1 assert zoom_factor in [1, 2, 4, 8] self.zoom_factor = zoom_factor if layers == 18: resnet = models.resnet18(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 34: resnet = models.resnet34(pretrained=True, deep_base=False, strides=(1, 2, 2, 2), dilations=(1, 1, 1, 1)) elif layers == 50: resnet = models.resnet50_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) elif layers == 101: resnet = models.resnet101_semseg(pretrained=True, deep_base=True, strides=(1, 2, 1, 1), dilations=(1, 1, 2, 4)) if layers == 18 or layers == 34: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool) else: self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu, resnet.conv2, resnet.bn2, resnet.relu, resnet.conv3, resnet.bn3, resnet.relu, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 if layers == 18 or layers == 34: fea_dim = 512 aux_dim = 256 else: fea_dim = 2048 aux_dim = 1024 down_dim = fea_dim // 4 self.pam_in_conv = ConvBNReLU(fea_dim, down_dim, ks=3, stride=1, padding=1) self.pam = PAM(down_dim) self.pam_out_conv = ConvBNReLU(down_dim, down_dim, ks=3, stride=1, padding=1) self.pam_cls_seg = nn.Sequential( nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1)) self.cam_in_conv = ConvBNReLU(fea_dim, down_dim, ks=3, stride=1, padding=1) self.cam = CAM(down_dim) self.cam_out_conv = ConvBNReLU(down_dim, down_dim, ks=3, stride=1, padding=1) self.cam_cls_seg = nn.Sequential( nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1)) self.cls_seg = nn.Sequential( nn.Dropout2d(p=dropout), nn.Conv2d(down_dim, classes, kernel_size=1)) if self.training: self.aux = nn.Sequential( nn.Conv2d(aux_dim, aux_dim // 4, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(aux_dim // 4), nn.ReLU(inplace=True), nn.Dropout2d(p=dropout), nn.Conv2d(aux_dim // 4, classes, kernel_size=1))
'activation should be relu/elu/leakyrelu/rrelu/sigmoid/tanh') # 设置是否使用多一层隐藏层+dropout hidden = int(args.hidden) # 选择模型 if args.layer == '18': net = resnet18(pretrained=False, progress=True, activate=activate, hidden=hidden, num_classes=10) elif args.layer == '34': net = resnet34(pretrained=False, progress=True, activate=activate, hidden=hidden, num_classes=10) elif args.layer == '50': net = resnet50(pretrained=False, progress=True, activate=activate, hidden=hidden, num_classes=10) elif args.layer == '101': net = resnet101(pretrained=False, progress=True, activate=activate, hidden=hidden, num_classes=10) elif args.layer == '152':
record_dir = 'record/' if not os.path.exists(record_dir): os.makedirs(record_dir) record_file = os.path.join( record_dir, '%s_net_%s_%s_to_%s_num_%s' % (args.method, args.net, args.source, args.target, args.num)) torch.cuda.manual_seed(args.seed) print('Source %s Target %s Labeled num perclass %s Network %s' % (args.source, args.target, args.num, args.net)) source_loader, target_loader, target_loader_unl, target_loader_val, \ target_loader_test, class_list = return_dataset(args) G = resnet34() # feature generator inc = 512 params = [] for key, value in dict(G.named_parameters()).items(): if value.requires_grad: if 'classifier' not in key: params += [{ 'params': [value], 'lr': args.multi, 'weight_decay': 0.0005 }] else: params += [{ 'params': [value], 'lr': args.multi * 10, 'weight_decay': 0.0005
train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) # prepare data loaders (combine dataset and smapler) train_loader = DataLoader(dataset_train, batch_size=64, sampler=train_sampler, num_workers=4) valid_loader = DataLoader(dataset_train, batch_size=64, sampler=valid_sampler, num_workers=4) model = resnet34().to(device) print(model) criterion1 = nn.CrossEntropyLoss() criterion2 = nn.MSELoss() optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4) exp_lr_scheduler = lr_scheduler.MultiStepLR(optimizer, [10, 25, 35, 45], gamma=0.2) #Train total_step = len(train_loader) curr_lr = learning_rate
print('Dataset %s Source %s Target %s Labeled num perclass %s Network %s' % (args.dataset, args.source, args.target, args.num, args.net)) source_loader, target_loader, target_loader_unl, target_loader_val, \ target_loader_test, class_list = return_dataset(args) use_gpu = torch.cuda.is_available() record_dir = 'record/%s/%s' % (args.dataset, args.method) if not os.path.exists(record_dir): os.makedirs(record_dir) record_file = os.path.join(record_dir, '%s_net_%s_%s_to_%s_num_%s' % (args.method, args.net, args.source, args.target, args.num)) torch.cuda.manual_seed(args.seed) if args.net == 'resnet34': G = resnet34() inc = 512 elif args.net == "alexnet": G = AlexNetBase() inc = 4096 elif args.net == "vgg": G = VGGBase() inc = 4096 else: raise ValueError('Model cannot be recognized.') params = [] for key, value in dict(G.named_parameters()).items(): if value.requires_grad: if 'classifier' not in key: params += [{'params': [value], 'lr': args.multi,
def main(): # Training settings parser = argparse.ArgumentParser(description='SSDA Classification') parser.add_argument('--steps', type=int, default=50000, metavar='N', help='maximum number of iterations ' 'to train (default: 50000)') parser.add_argument( '--method', type=str, default='MME', choices=['S+T', 'ENT', 'MME'], help='MME is proposed method, ENT is entropy minimization,' ' S+T is training only on labeled examples') parser.add_argument('--lr', type=float, default=0.01, metavar='LR', help='learning rate (default: 0.001)') parser.add_argument('--multi', type=float, default=0.1, metavar='MLT', help='learning rate multiplication') parser.add_argument('--T', type=float, default=0.05, metavar='T', help='temperature (default: 0.05)') parser.add_argument('--lamda', type=float, default=0.1, metavar='LAM', help='value of lamda') parser.add_argument('--save_check', action='store_true', default=False, help='save checkpoint or not') parser.add_argument('--checkpath', type=str, default='./save_model_ssda', help='dir to save checkpoint') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=100, metavar='N', help='how many batches to wait before logging ' 'training status') parser.add_argument('--save_interval', type=int, default=500, metavar='N', help='how many batches to wait before saving a model') parser.add_argument('--net', type=str, default='alexnet', help='which network to use') parser.add_argument('--source', type=str, default='real', help='source domain') parser.add_argument('--target', type=str, default='sketch', help='target domain') parser.add_argument('--dataset', type=str, default='multi', choices=['multi', 'office', 'office_home'], help='the name of dataset') parser.add_argument('--num', type=int, default=3, help='number of labeled examples in the target') parser.add_argument('--patience', type=int, default=5, metavar='S', help='early stopping to wait for improvment ' 'before terminating. (default: 5 (5000 iterations))') parser.add_argument('--early', action='store_false', default=True, help='early stopping on validation or not') parser.add_argument('--loss', type=str, default='CE', choices=['CE', 'FL', 'CBFL'], help='classifier loss function') parser.add_argument( '--attribute', type=str, default=None, choices=[ 'word2vec', 'glove.6B.100d.txt', 'glove.6B.300d.txt', 'glove_anurag', 'fasttext_anurag', 'glove.840B.300d.txt', 'glove.twitter.27B.200d.txt', 'glove.twitter.27B.50d.txt', 'glove.42B.300d.txt', 'glove.6B.200d.txt', 'glove.6B.50d.txt', 'glove.twitter.27B.100d.txt', 'glove.twitter.27B.25d.txt' ], help='semantic attribute feature vector to be used') parser.add_argument( '--dim', type=int, default=300, help= 'dimensionality of the feature vector - make sure this in sync with the dim of the semantic attribute vector' ) parser.add_argument( '--deep', type=int, default=0, help='type of classification predictor - 0 for shallow, 1 for deep') parser.add_argument('--mode', type=str, default='train', choices=['train', 'infer'], help='mode of script train or infer') args = parser.parse_args() print('Dataset %s Source %s Target %s Labeled num perclass %s Network %s' % (args.dataset, args.source, args.target, args.num, args.net)) source_loader, target_loader, target_loader_unl, target_loader_val, \ target_loader_test, class_num_list, class_list = return_dataset(args) # class num list is returned for CBFL use_gpu = torch.cuda.is_available() record_dir = 'record/%s/%s' % (args.dataset, args.method) if not os.path.exists(record_dir): os.makedirs(record_dir) record_file = os.path.join( record_dir, '%s_net_%s_%s_to_%s_num_%s' % (args.method, args.net, args.source, args.target, args.num)) if use_gpu: device = 'cuda' else: device = 'cpu' print("Device: %s Loss: %s Attributes: %s" % (device, args.loss, args.attribute)) if use_gpu: torch.cuda.manual_seed(args.seed) else: torch.manual_seed(args.seed) if args.net == 'resnet34': G = resnet34() inc = 512 elif args.net == "alexnet": G = AlexNetBase() inc = 4096 elif args.net == "vgg": G = VGGBase() inc = 4096 else: raise ValueError('Model cannot be recognized.') params = [] for key, value in dict(G.named_parameters()).items(): if value.requires_grad: if 'classifier' not in key: params += [{ 'params': [value], 'lr': 0.1, 'weight_decay': 0.0005 }] else: params += [{ 'params': [value], 'lr': 1, 'weight_decay': 0.0005 }] # Setting the predictor layer if args.attribute is not None: if args.deep: F1 = Predictor_deep_attributes(num_class=len(class_list), inc=inc, feat_dim=args.dim) print("Using: Predictor_deep_attributes") else: F1 = Predictor_attributes(num_class=len(class_list), inc=inc, feat_dim=args.dim) print("Using: Predictor_attributes") else: if args.deep: F1 = Predictor_deep(num_class=len(class_list), inc=inc) print("Using: Predictor_deep") else: F1 = Predictor(num_class=len(class_list), inc=inc, temp=args.T) print("Using: Predictor") # Initializing the weights of the prediction layer weights_init(F1) # Setting the prediction layer weights as the semantic attributes if args.attribute is not None: att = np.load('attributes/%s_%s.npy' % (args.dataset, args.attribute)) if use_gpu: att = nn.Parameter(torch.cuda.FloatTensor(att)) else: att = nn.Parameter(torch.FloatTensor(att, device="cpu")) if args.deep: F1.fc3.weight = att else: F1.fc2.weight = att print("attribute shape is: ", att.shape) lr = args.lr # loading the model checkpoint - and printing some parameters relating to the checkpoints main_dict = torch.load(args.checkpath + "/" + args.net + "_" + args.method + "_" + args.source + "_" + args.target + ".ckpt.best.pth.tar") G.load_state_dict(main_dict['G_state_dict']) F1.load_state_dict(main_dict['F1_state_dict']) print("Loaded pretrained model weights") print("Loaded weights from step: ", main_dict['step']) print("Current best test acc is: ", main_dict['best_acc_test']) G.to(device) F1.to(device) # Loading the txt file having the weights and paths of the image file as a data frame df = pd.read_csv(args.method + '_' + main_dict['arch'] + '_' + str(main_dict['step']) + '.txt', sep=" ", header=None) df = df[[3, 0]] df = df.rename(columns={3: "img", 0: "weight"}) im_data_s = torch.FloatTensor(1) im_data_t = torch.FloatTensor(1) im_data_tu = torch.FloatTensor(1) gt_labels_t = torch.LongTensor(1) gt_labels_s = torch.LongTensor(1) sample_labels_t = torch.LongTensor(1) sample_labels_s = torch.LongTensor(1) im_data_s = im_data_s.to(device) im_data_t = im_data_t.to(device) im_data_tu = im_data_tu.to(device) gt_labels_s = gt_labels_s.to(device) gt_labels_t = gt_labels_t.to(device) sample_labels_t = sample_labels_t.to(device) sample_labels_s = sample_labels_s.to(device) im_data_s = Variable(im_data_s) im_data_t = Variable(im_data_t) im_data_tu = Variable(im_data_tu) gt_labels_s = Variable(gt_labels_s) gt_labels_t = Variable(gt_labels_t) sample_labels_t = Variable(sample_labels_t) sample_labels_s = Variable(sample_labels_s) if os.path.exists(args.checkpath) == False: os.mkdir(args.checkpath) def train(): G.train() F1.train() optimizer_g = optim.SGD(params, momentum=0.9, weight_decay=0.0005, nesterov=True) optimizer_f = optim.SGD(list(F1.parameters()), lr=1.0, momentum=0.9, weight_decay=0.0005, nesterov=True) # Loading the states of the two optmizers optimizer_g.load_state_dict(main_dict['optimizer_g']) optimizer_f.load_state_dict(main_dict['optimizer_f']) print("Loaded optimizer states") def zero_grad_all(): optimizer_g.zero_grad() optimizer_f.zero_grad() param_lr_g = [] for param_group in optimizer_g.param_groups: param_lr_g.append(param_group["lr"]) param_lr_f = [] for param_group in optimizer_f.param_groups: param_lr_f.append(param_group["lr"]) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=1).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = 0.99 effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=0.5).to(device) all_step = args.steps data_iter_s = iter(source_loader) data_iter_t = iter(target_loader) data_iter_t_unl = iter(target_loader_unl) len_train_source = len(source_loader) len_train_target = len(target_loader) len_train_target_semi = len(target_loader_unl) best_acc = 0 counter = 0 for step in range(all_step): optimizer_g = inv_lr_scheduler(param_lr_g, optimizer_g, step, init_lr=args.lr) optimizer_f = inv_lr_scheduler(param_lr_f, optimizer_f, step, init_lr=args.lr) lr = optimizer_f.param_groups[0]['lr'] # condition for restarting the iteration for each of the data loaders if step % len_train_target == 0: data_iter_t = iter(target_loader) if step % len_train_target_semi == 0: data_iter_t_unl = iter(target_loader_unl) if step % len_train_source == 0: data_iter_s = iter(source_loader) data_t = next(data_iter_t) data_t_unl = next(data_iter_t_unl) data_s = next(data_iter_s) with torch.no_grad(): im_data_s.resize_(data_s[0].size()).copy_(data_s[0]) gt_labels_s.resize_(data_s[1].size()).copy_(data_s[1]) im_data_t.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.resize_(data_t[1].size()).copy_(data_t[1]) im_data_tu.resize_(data_t_unl[0].size()).copy_(data_t_unl[0]) zero_grad_all() data = torch.cat((im_data_s, im_data_t), 0) target = torch.cat((gt_labels_s, gt_labels_t), 0) output = G(data) out1 = F1(output) loss = criterion(out1, target) loss.backward(retain_graph=True) optimizer_g.step() optimizer_f.step() zero_grad_all() # list of the weights and image paths in this batch img_paths = list(data_t_unl[2]) df1 = df.loc[df['img'].isin(img_paths)] df1 = df1['weight'] weight_list = list(df1) if not args.method == 'S+T': output = G(im_data_tu) if args.method == 'ENT': loss_t = entropy(F1, output, args.lamda) loss_t.backward() optimizer_f.step() optimizer_g.step() elif args.method == 'MME': loss_t = adentropy(F1, output, args.lamda, weight_list) loss_t.backward() optimizer_f.step() optimizer_g.step() else: raise ValueError('Method cannot be recognized.') log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Loss T {:.6f} ' \ 'Method {}\n'.format(args.source, args.target, step, lr, loss.data, -loss_t.data, args.method) else: log_train = 'S {} T {} Train Ep: {} lr{} \t ' \ 'Loss Classification: {:.6f} Method {}\n'.\ format(args.source, args.target, step, lr, loss.data, args.method) G.zero_grad() F1.zero_grad() zero_grad_all() if step % args.log_interval == 0: print(log_train) if step % args.save_interval == 0 and step > 0: loss_val, acc_val = test(target_loader_val) loss_test, acc_test = test(target_loader_test) G.train() F1.train() if acc_test >= best_acc: best_acc = acc_test best_acc_test = acc_test counter = 0 else: counter += 1 if args.early: if counter > args.patience: break print('best acc test %f best acc val %f' % (best_acc_test, acc_val)) print('record %s' % record_file) with open(record_file, 'a') as f: f.write('step %d best %f final %f \n' % (step, best_acc_test, acc_val)) G.train() F1.train() #saving model as a checkpoint dict having many things if args.save_check: print('saving model') is_best = True if counter == 0 else False save_mymodel( args, { 'step': step, 'arch': args.net, 'G_state_dict': G.state_dict(), 'F1_state_dict': F1.state_dict(), 'best_acc_test': best_acc_test, 'optimizer_g': optimizer_g.state_dict(), 'optimizer_f': optimizer_f.state_dict(), }, is_best) # defining the function for in training validation and testing def test(loader): G.eval() F1.eval() test_loss = 0 correct = 0 size = 0 num_class = len(class_list) output_all = np.zeros((0, num_class)) # Setting the loss function to be used for the classification loss if args.loss == 'CE': criterion = nn.CrossEntropyLoss().to(device) if args.loss == 'FL': criterion = FocalLoss(alpha=1, gamma=1).to(device) if args.loss == 'CBFL': # Calculating the list having the number of examples per class which is going to be used in the CB focal loss beta = 0.99 effective_num = 1.0 - np.power(beta, class_num_list) per_cls_weights = (1.0 - beta) / np.array(effective_num) per_cls_weights = per_cls_weights / np.sum(per_cls_weights) * len( class_num_list) per_cls_weights = torch.FloatTensor(per_cls_weights).to(device) criterion = CBFocalLoss(weight=per_cls_weights, gamma=0.5).to(device) confusion_matrix = torch.zeros(num_class, num_class) with torch.no_grad(): for batch_idx, data_t in enumerate(loader): im_data_t.data.resize_(data_t[0].size()).copy_(data_t[0]) gt_labels_t.data.resize_(data_t[1].size()).copy_(data_t[1]) feat = G(im_data_t) output1 = F1(feat) output_all = np.r_[output_all, output1.data.cpu().numpy()] size += im_data_t.size(0) pred1 = output1.data.max(1)[1] for t, p in zip(gt_labels_t.view(-1), pred1.view(-1)): confusion_matrix[t.long(), p.long()] += 1 correct += pred1.eq(gt_labels_t.data).cpu().sum() test_loss += criterion(output1, gt_labels_t) / len(loader) np.save("cf_target.npy", confusion_matrix) #print(confusion_matrix) print('\nTest set: Average loss: {:.4f}, ' 'Accuracy: {}/{} F1 ({:.0f}%)\n'.format(test_loss, correct, size, 100. * correct / size)) return test_loss.data, 100. * float(correct) / size print("Starting stage 2 training of the model ...") train()