コード例 #1
0
    def __init__(self, out_planes, is_training):
        super(Network_UNet, self).__init__()
        self.layers = []
        self.is_training = is_training
        
        self.layers = []
        
        conv_channel = 128
        # base model of resnet 18 from resnet.py
        self.resnet = resnet18(pretrained_model=None, norm_layer=BN2D, bn_eps=config.bn_eps, bn_momentum=config.bn_momentum, deep_stem=False, stem_width=64)
        # tail refinement on FCN style contraction path
        self.refine_512 = nn.Sequential(
                        ConvBnRelu(512, 1024, 7, 1, 3, has_bn=False, has_relu=True, has_bias=False, norm_layer=BN2D),
                        nn.Dropout2d(),
                        ConvBnRelu(1024, out_planes, 1, 1, 0, has_bn=False, has_relu=True, has_bias=False, norm_layer=BN2D),
                        nn.Dropout2d(),
                        nn.AdaptiveAvgPool2d(1)
        )
        
        # upscale using Transpose convolution
        self.up_512 = nn.ConvTranspose2d(out_planes, out_planes, kernel_size=4,stride=2,padding=2, output_padding=1)
        # Refinement on intermediate layers in FCN style structure
        self.refine_256 = ConvBnRelu(256, out_planes, 1, 1, 0, has_bn=False, has_relu=False, has_bias=False, norm_layer=BN2D)
        self.refine_128 = ConvBnRelu(128, out_planes, 1, 1, 0, has_bn=False, has_relu=False, has_bias=False, norm_layer=BN2D)
        self.refine_64 = ConvBnRelu(64, out_planes, 1, 1, 0, has_bn=False, has_relu=False, has_bias=False, norm_layer=BN2D) 
        
        # upscale using Transpose convolution
        self.up_256 = nn.ConvTranspose2d(out_planes, out_planes, kernel_size=4,stride=2,padding=1)
        self.up_128 = nn.ConvTranspose2d(out_planes, out_planes, kernel_size=4,stride=2,padding=1)

        
        self.up_final = nn.ConvTranspose2d(out_planes, out_planes, kernel_size=8,stride=4,padding=2)           


        self.layers.append(self.resnet)
        self.layers.append(self.refine_512)
        self.layers.append(self.refine_256)
        self.layers.append(self.refine_128)
        self.layers.append(self.refine_64)
        self.layers.append(self.up_512)
        self.layers.append(self.up_256)
        self.layers.append(self.up_128)
        self.layers.append(self.up_final)

        self.loss = nn.CrossEntropyLoss(reduction='mean', ignore_index=255)	
コード例 #2
0
ファイル: network.py プロジェクト: hsiung30576/ufs_seg
    def __init__(self,
                 out_planes,
                 is_training,
                 criterion,
                 pretrained_model=None,
                 norm_layer=nn.BatchNorm2d):
        super(conf, self).__init__()

        self.is_training = is_training
        self.business_layer = []

        if is_training:
            self.criterion = criterion

        self.encoder = resnet18(pretrained_model,
                                norm_layer=norm_layer,
                                bn_eps=config.bn_eps,
                                bn_momentum=config.bn_momentum,
                                deep_stem=False,
                                stem_width=64)

        self.context_ff = AttentionFusion(256, 512, 128)
        self.spatial_conv = ConvBnRelu(64,
                                       128,
                                       1,
                                       1,
                                       0,
                                       dilation=1,
                                       has_bn=True,
                                       norm_layer=norm_layer,
                                       has_relu=True,
                                       has_bias=False)
        self.loc_conf = LocationConfidence(128 + 128, 1)

        self.refine_block = RefineOutput(128, out_planes, 4)
        self.spatial_refine_block = RefineOutput(128, out_planes, 4)
        self.context_refine_block = RefineOutput(128, out_planes, 16)

        self.business_layer.append(self.context_ff)
        self.business_layer.append(self.spatial_conv)
        self.business_layer.append(self.loc_conf)
        self.business_layer.append(self.refine_block)
        self.business_layer.append(self.spatial_refine_block)
        self.business_layer.append(self.context_refine_block)
コード例 #3
0
    parser.add_argument('--epoch', type=int, default=20)
    parser.add_argument('--learning_rate', type=float, default=0.001)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight_decay', type=float, default=5e-4)
    parser.add_argument('--stage', type=int, default=1)
    parser.add_argument('--evaluate', type=int, default=0)
    args = parser.parse_args()

    trainset = AudioVisualData()
    testset = TestData()
    trainloader = data.DataLoader(trainset, batch_size=args.train_batch, shuffle=True, 
                                  collate_fn=DataAllocate, num_workers=args.num_workers)
    testloader = data.DataLoader(testset, batch_size=args.val_batch, shuffle=False, 
                                  collate_fn=TestAllocate, num_workers=args.num_workers)
    
    vision_net = resnet18(modal='vision', pretrained=True)
    audio_net = resnet18(modal='audio')
    
    if args.evaluate:
        net = Location(vision_net, audio_net).cuda()
        net.load_state_dict(torch.load(args.path))
        test(net, testloader)
        exit()
    
    net = MTask(vision_net, audio_net).cuda() if args.stage == 1 \
        else Align(vision_net, audio_net).cuda()
    if args.pretrained:
        net.load_state_dict(torch.load(args.path), strict=False)
    
    params = list(net.parameters())
    optimizer = torch.optim.SGD(params=params, lr=args.learning_rate, 
コード例 #4
0
    def __init__(self,
                 out_planes,
                 is_training,
                 criterion,
                 pretrained_model=None,
                 norm_layer=nn.BatchNorm2d):
        super(BiSeNet, self).__init__()
        self.context_path = resnet18(pretrained_model,
                                     norm_layer=norm_layer,
                                     bn_eps=config.bn_eps,
                                     bn_momentum=config.bn_momentum,
                                     deep_stem=False,
                                     stem_width=64)

        self.business_layer = []
        self.is_training = is_training

        self.spatial_path = SpatialPath(3, 128, norm_layer)

        conv_channel = 128
        self.global_context = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            ConvBnRelu(512,
                       conv_channel,
                       1,
                       1,
                       0,
                       has_bn=True,
                       has_relu=True,
                       has_bias=False,
                       norm_layer=norm_layer))

        # stage = [512, 256, 128, 64]
        arms = [
            AttentionRefinement(512, conv_channel, norm_layer),
            AttentionRefinement(256, conv_channel, norm_layer)
        ]
        refines = [
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=norm_layer,
                       has_relu=True,
                       has_bias=False),
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=norm_layer,
                       has_relu=True,
                       has_bias=False)
        ]

        heads = [
            BiSeNetHead(conv_channel, out_planes, 16, True, norm_layer),
            BiSeNetHead(conv_channel, out_planes, 8, True, norm_layer),
            BiSeNetHead(conv_channel * 2, out_planes, 8, False, norm_layer)
        ]

        self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 1,
                                 norm_layer)

        self.arms = nn.ModuleList(arms)
        self.refines = nn.ModuleList(refines)
        self.heads = nn.ModuleList(heads)

        self.business_layer.append(self.spatial_path)
        self.business_layer.append(self.global_context)
        self.business_layer.append(self.arms)
        self.business_layer.append(self.refines)
        self.business_layer.append(self.heads)
        self.business_layer.append(self.ffm)

        if is_training:
            self.criterion = criterion
コード例 #5
0
    def __init__(self, out_planes, is_training, BN2D=BatchNorm2d):
        super(Network_Res18, self).__init__()
        self.layers = []
        self.is_training = is_training

        conv_channel = 128
        # use base model of resnet 18 from resnet.py
        self.context = resnet18(pretrained_model=None,
                                norm_layer=BN2D,
                                bn_eps=config.bn_eps,
                                bn_momentum=config.bn_momentum,
                                deep_stem=False,
                                stem_width=64)
        self.context_refine = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            ConvBnRelu(512,
                       conv_channel,
                       1,
                       1,
                       0,
                       has_bn=True,
                       has_relu=True,
                       has_bias=False,
                       norm_layer=BN2D))

        # ARM for ResBlock 2,3,4 of resnet output
        arms = [
            AttentionRefinement(512, conv_channel, norm_layer=BN2D),
            AttentionRefinement(256, conv_channel, norm_layer=BN2D),
            AttentionRefinement(128, conv_channel, norm_layer=BN2D)
        ]

        # Refinement of corresponding output
        refines = [
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=BN2D,
                       has_relu=True,
                       has_bias=False),
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=BN2D,
                       has_relu=True,
                       has_bias=False),
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=BN2D,
                       has_relu=True,
                       has_bias=False)
        ]

        self.arms = nn.ModuleList(arms)
        self.refines = nn.ModuleList(refines)

        # Refinement on first layer of resnet output
        self.res_top_refine = ConvBnRelu(64,
                                         conv_channel // 2,
                                         3,
                                         1,
                                         1,
                                         has_bn=True,
                                         norm_layer=BN2D,
                                         has_relu=True,
                                         has_bias=False)

        self.ffm = FeatureFusion(192, conv_channel, 1, BN2D)
        # classifier for final output
        self.class_refine = nn.Sequential(
            ConvBnRelu(conv_channel,
                       conv_channel // 2,
                       3,
                       1,
                       1,
                       has_bn=True,
                       has_relu=True,
                       has_bias=False,
                       norm_layer=BN2D),
            nn.Conv2d(conv_channel // 2,
                      out_planes,
                      kernel_size=1,
                      stride=1,
                      padding=0))

        self.layers.append(self.context)
        self.layers.append(self.class_refine)
        self.layers.append(self.context_refine)
        self.layers.append(self.arms)
        self.layers.append(self.ffm)
        self.layers.append(self.refines)
        self.layers.append(self.res_top_refine)
        self.loss = nn.CrossEntropyLoss(reduction='mean', ignore_index=255)