Ejemplo n.º 1
0
 def __init__(self):
     super(PartialGen, self).__init__()
     self.first = PartialConv2d(3,
                                32,
                                1,
                                1,
                                multi_channel=True,
                                return_mask=True)
     self.g1 = GenBlock(32)
     self.c1 = Conv2d(32, 32, 3, 1, 1)
     self.c2 = Conv2d(32, 3, 3, 1, 1)
Ejemplo n.º 2
0
    def test_conv_with_backward_2kernels(self):
        conv = """
        def convolution(float(N,C,H,W) I, float(M,C,KH,KW) W1, float(M) Bias)
        -> (O)
        {
            O(n, m, h, w) +=!
                I(n, r_c, h + r_kh, w + r_kw) * W1(m, r_c, r_kh, r_kw)
            O(n, m, h, w)  = O(n, m, h, w) + Bias(m)
        }
        def convolution_igrad(float(M,C,KH,KW) W1, float(N,M,H,W) d_O)
            -> (d_I)
        {
            d_I(n, c, h, w) +=!
                d_O(  n, r_m, h - r_kh, w - r_kw) * W1(r_m, c, r_kh, r_kw)
        }
        def convolution_wgrad(float(N,C,H,W) I, float(N,M,H,W) d_O) -> (d_W1)
        {
            d_W1(m, c, kh, kw) +=!
                d_O(r_n,   m, r_h - kh, r_w - kw) *  I(r_n, c,  r_h,  r_w)
        }
        def convolution_biasgrad(float(M) Bias) -> (d_Bias)
        {
            # TODO: Bias incorrect + check
            d_Bias(m) = Bias(m)
        }
        """

        N, C, H, W, O, kH, kW = 32, 4, 56, 56, 16, 1, 1
        T = tc.define(
            conv,
            tc.make_autotuned_options_factory(starting_options='naive',
                                              tuner_config=tuner_config))
        I = torch.randn(N, C, H, W, device='cuda', requires_grad=True)

        # Reference
        from torch.nn.modules.conv import Conv2d
        Conv = Conv2d(C, O, 1, stride=1).cuda()
        ref = Conv.forward(I)

        W = Conv.weight.clone()
        Bias = Conv.bias.clone()

        def convolution_backward(I, W, Bias, d_O):
            d_I = T.convolution_igrad(W, d_O)
            d_O = T.convolution_wgrad(I, d_O)
            d_Bias = T.convolution_biasgrad(Bias)
            return (d_I, d_O, d_Bias)

        convolution_function = tc.make_autograd(T.convolution,
                                                convolution_backward)

        # First occurrence triggers tuning
        out = convolution_function(I, W, Bias)
        out.sum().backward()

        # Subsequent occurrences do not
        out = convolution_function(I, W, Bias)
        out.sum().backward()

        tc.assert_almost_equal(ref, out, I, operations=C * kH * kW)
Ejemplo n.º 3
0
    def __init__(self,
                 img_size=(args.im_s, args.im_s),
                 patch_size=(args.ps, args.ps),
                 emb_dim=args.emb_dim,
                 mlp_dim=args.mlp_dim,
                 num_heads=args.num_heads,
                 num_layers=args.num_layers,
                 n_classes=2,
                 dropout_rate=0.,
                 at_d_r=args.at_d_r):
        super(VIT, self).__init__()

        self.nl = num_layers
        ih, iw = img_size
        ph, pw = patch_size
        num_patches = int((ih * iw) / (ph * pw))
        self.cls_tokens = nn.Parameter(torch.rand(1, 1, emb_dim))
        self.patch_embed = Conv2d(in_channels=3,
                                  out_channels=emb_dim,
                                  kernel_size=patch_size,
                                  stride=patch_size)
        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
        self.dropout = nn.Dropout(dropout_rate)

        self.tel = nn.ModuleList()
        for i in range(num_layers):
            layer = transencoder(emb_dim, mlp_dim, num_heads, at_d_r)
            self.tel.append(layer)

        self.mlp_head = nn.Sequential(nn.LayerNorm(emb_dim),
                                      nn.Linear(emb_dim, n_classes))
Ejemplo n.º 4
0
 def _init_layers(self):
     padding = self.filter_size // 2
     self.conv1 = Conv2d(self.input_channels,
                         self.num_filters,
                         kernel_size=self.filter_size,
                         padding=padding)
     self.relu1 = ReLU()
     self.conv2 = Conv2d(self.num_filters,
                         self.num_filters,
                         kernel_size=self.filter_size,
                         padding=padding)
     self.relu2 = ReLU()
     self.conv3 = Conv2d(self.num_filters,
                         1,
                         kernel_size=self.filter_size,
                         padding=padding)
     self.softmax = Softmax(dim=0)
Ejemplo n.º 5
0
    def __init__(self, batch_norm=True):
        super().__init__()
        self.batch_norm = batch_norm
        self.c1 = Conv2d(3, 64, 5, 2, padding=2)
        if self.batch_norm:
            self.bn1 = BatchNorm2d(64)
        self.c2 = Conv2d(64, 128, 3, 2, padding=1)
        if self.batch_norm:
            self.bn2 = BatchNorm2d(128)
        self.c3 = Conv2d(128, 128, 3, 1, padding=1)
        if self.batch_norm:
            self.bn3 = BatchNorm2d(128)

        self.up4 = Upsample(scale_factor=2)
        self.c4 = Conv2d(128 + 64, 64, 3, 1, padding=1)
        self.up5 = Upsample(scale_factor=2)
        self.c5 = Conv2d(64 + 3, 3, 3, 1, padding=1)
Ejemplo n.º 6
0
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        *args,
        hidden_kernel: Union[int, Tuple[int, int]],
        **kwargs,
        # XXX star-args are passed directly to Conv2d for the `input`
    ):
        # make sure the kenrel sizes are odd, and get the padding
        # XXX the only place where the `2d-ness` is hardcoded here!
        n_kernel = _pair(hidden_kernel)
        assert all(k & 1 for k in n_kernel)

        n_pad = [k >> 1 for k in n_kernel]  # stride == 1

        super().__init__()

        # input to reset and update gates, and the candidate state
        self.x_hrz = Conv2d(
            in_channels,
            3 * out_channels,
            *args,
            **kwargs,
        )

        # hidden state to reset and update gates
        self.h_rz = Conv2d(
            out_channels,
            2 * out_channels,
            n_kernel,
            stride=1,
            bias=False,
            padding=n_pad,
        )

        # hidden state to the candidate
        self.h_h = Conv2d(
            out_channels,
            out_channels,
            n_kernel,
            stride=1,
            bias=False,
            padding=n_pad,
        )
Ejemplo n.º 7
0
    def __init__(self):
        super().__init__()
        self.first = PartialConv2d(3,
                                   32,
                                   3,
                                   padding=1,
                                   multi_channel=True,
                                   return_mask=True)
        self.r1 = PartialResBlock(32)
        self.r2 = PartialResBlock(32)
        self.r3 = PartialResBlock(32)

        self.c1 = Conv2d(32, 32 * 4, 3, 2, 1)
        self.up1 = PixelShuffle(2)
        self.c2 = Conv2d(32, 16, 3, 1, 1)

        self.c3 = Conv2d(16, 16 * 4, 3, 2, 1)
        self.up2 = PixelShuffle(2)
        self.c4 = Conv2d(16, 3, 3, 1, 1)
Ejemplo n.º 8
0
def make_conv_layers(cfg):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M_2':
            layers += [Downsample(kernel_size=2)]
        elif v == 'M_4':
            layers += [Downsample(kernel_size=4)]
        else:
            conv = Conv2d(in_channels, v, kernel_size=3, padding=1)
            layers += [conv, ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)
Ejemplo n.º 9
0
def acl_vgg(data, stateful):
    dcn = dcn_vgg()
    att_module = nn.Sequential(
                    MaxPool2d(kernel_size=(2,2), stride=(2,2))
                    #The channels being input to maxpool are 512 (dcn output), to find the output channels of maxpool divide by the kernel size. 512/2=256
                    Conv2d(256, 64, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(64, 128, kernel_size=(3, 3), padding=0)
                    ReLU()
                    MaxPool2d(kernel_size=(2,2), stride=(2,2))
                    Conv2D(128, 64, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(64, 128, kernel_size=(3, 3), padding=0)
                    ReLU()
                    Conv2D(128, 1, kernel_size=(1, 1), padding=0)
                    Sigmoid()
                    Upsample(scale_factor=4, mode='nearest')
                    )


    outs = TimeDistributed(dcn)(data)

    attention = TimeDistributed(att_module)(outs)

    f_attention = TimeDistributed()(attention.view(attention.size()[0], -1)) #flatten
    f_attention = TimeDistributed()(f_attention.expand(512)) #repeatvector
    f_attention = TimeDistributed()(f_attention.transpose().unsqueeze(0)) #permute
    f_attention = TimeDistributed()(f_attention.reshape((32, 40, 512)))
    m_outs = outs * f_attention #elementwise multiplication
    outs = outs + m_outs

    ### This needs to change
    clstm = ConvLSTMCell(use_gpu=False, input_size=512, hidden_size=256, kernel_size=(3,3))
    outs = clstm(outs)
    ###

    produce_smaps = nn.Sequential(
                    #InputDimensions will be figured out after changing the ConvLSTM
                    Conv2D(InputDimensions, 1, kernel_size=(1, 1), padding=0)
                    Sigmoid()
                    Upsample(scale_factor=4, mode='nearest')
                    )

    outs = TimeDistributed(produce_smaps)(outs)
    attention = TimeDistributed(Upsample(scale_factor=2, mode='nearest'))(attention)
    return [outs, outs, outs, attention, attention, attention]
Ejemplo n.º 10
0
 def __init__(self):
     super().__init__()
     self.first = PartialConv2d(3,
                                32,
                                1,
                                1,
                                multi_channel=True,
                                return_mask=True)
     self.c1 = PartialConv2d(32,
                             32,
                             3,
                             1,
                             padding=1,
                             multi_channel=True,
                             return_mask=True)
     self.c2 = PartialConv2d(32,
                             32,
                             3,
                             1,
                             padding=1,
                             multi_channel=True,
                             return_mask=True)
     self.c3 = PartialConv2d(32,
                             32,
                             3,
                             1,
                             padding=1,
                             multi_channel=True,
                             return_mask=True)
     self.c4 = PartialConv2d(32,
                             32,
                             3,
                             1,
                             padding=1,
                             multi_channel=True,
                             return_mask=True)
     self.last = Conv2d(32, 3, 3, 1, 1)
Ejemplo n.º 11
0
    def __init__(self):

        super(Discriminator, self).__init__()
        self.conv1_1 = Conv2d(4,
                              3,
                              kernel_size=(1, 1),
                              stride=(1, 1),
                              padding=(1, 1))
        self.conv1_2 = Conv2d(3,
                              32,
                              kernel_size=(3, 3),
                              stride=(1, 1),
                              padding=(1, 1))

        self.conv2_1 = Conv2d(32,
                              64,
                              kernel_size=(3, 3),
                              stride=(1, 1),
                              padding=(1, 1))
        self.conv2_2 = Conv2d(64,
                              64,
                              kernel_size=(3, 3),
                              stride=(1, 1),
                              padding=(1, 1))

        self.conv3_1 = Conv2d(64,
                              64,
                              kernel_size=(3, 3),
                              stride=(1, 1),
                              padding=(1, 1))
        self.conv3_2 = Conv2d(64,
                              64,
                              kernel_size=(3, 3),
                              stride=(1, 1),
                              padding=(1, 1))

        self.fc4 = Linear(49152, 100)
        self.fc5 = Linear(100, 2)
        self.fc6 = Linear(2, 1)

        self.pool = MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.tanh = Tanh()
        self.sigmoid = Sigmoid()
Ejemplo n.º 12
0
    def __init__(self):
        super(Sal_based_Attention_module, self).__init__()

        # Create encoder based on VGG16 architecture as pointed on salgan architecture and apply aforementionned changes
        Based_Attention_Module = based_AM

        # select only first 5 conv blocks , here we keep same receptive field of VGG 212*212
        # each neuron on bottelneck will see just (244,244) viewport during sliding  ,
        # input (640,320) , features numbers on bottelneck 40*20*512, exclude last maxpooling of salgan ,receptive
        # features number on AM boottlneck 10*5*128
        # attentioin moduels receptive field enlarged (676,676)
        self.encoder = torch.nn.Sequential(*Based_Attention_Module)
        self.attention_module = torch.nn.Sequential(*[
            Downsample(kernel_size=2),
            Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Downsample(kernel_size=2),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
            Upsample(scale_factor=4, mode='nearest')
        ])

        #self.reshape = Reshape(-1,512,40,20)

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder = torch.nn.Sequential(*decoder_list)

        print("Model initialized, Sal_based_Attention_module")
    def  __init__(self):
        super(Decoder,self,pretainer = True).__init__()

        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor= 4, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder = torch.nn.Sequential(*decoder_list)
        self._initialize_weights()
        print("decoder initialized")
        print("architecture len :",str(len(self.Autoencoder))) 
 def  __init__(self):
     super(Encoder,self,pretainer = True).__init__()
     # Create encoder based on VGG16 architecture 
     # Change just 4,5 th maxpooling layer to 4 scale instead of 2 
     # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field
     # each neuron on bottelneck will see (580,580) all viewports  ,
     # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling
     encoder_list[
         Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=0),
         ReLU(),
         Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Downsample(kernel_size = 3)
         Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),            
         Downsample(kernel_size = 3)
         Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(), 
         Downsample(kernel_size = 3 , stride = 4)
         Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Downsample(kernel_size = 3 , stride = 4)
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),              
         Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
         ReLU(),                           
     ]
     self.encoder = torch.nn.Sequential(*Global_Attention_Encoder)
     print("encoder initialized")
     print("architecture len :",str(len(self.Autoencoder))) 
Ejemplo n.º 15
0
    def __init__(self, seed_init, freeze=True, use_gpu=True):
        super(SalCLSTM56, self).__init__()

        self.use_gpu = use_gpu

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            #During Upsampling operation we may end up losing 1 dimension if it was an odd number before
        ]

        decoder = torch.nn.Sequential(*decoder_list)
        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))
        #print(self.salgan)
        # ConvLSTM
        self.input_size = 128
        self.hidden_size = 128
        self.Gates = nn.Conv2d(
            in_channels=self.input_size + self.hidden_size,
            out_channels=4 * self.hidden_size,
            kernel_size=(3, 3),
            padding=1)  #padding 1 to preserve HxW dimensions

        final_convolutions = [
            Conv2d(self.hidden_size,
                   64,
                   kernel_size=(3, 3),
                   stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]
        self.final_convs = torch.nn.Sequential(*final_convolutions)

        # Initialize weights of ConvLSTM

        torch.manual_seed(seed_init)
        for name, param in self.Gates.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)
            else:
                print(
                    "There is some uninitiallized parameter. Check your parameters and try again."
                )
                exit()
        for name, param in self.final_convs.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 0)
            else:
                print(
                    "There is some uninitiallized parameter. Check your parameters and try again."
                )
                exit()

        # Freeze SalGAN
        if freeze:
            for child in self.salgan.children():
                for param in child.parameters():
                    param.requires_grad = False
Ejemplo n.º 16
0
    def __init__(self):
        super(SalGAN, self).__init__()
        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))
Ejemplo n.º 17
0
    def __init__(self):
        super(Scanpath_based_Attention_module, self).__init__()
        Based_Attention_Module = based_AM
        soft_sam = SpatialSoftArgmax2d(normalized_coordinates=False)
        self.soft_sam = soft_sam
        self.encoder = torch.nn.Sequential(*Based_Attention_Module)
        self.attention_module = torch.nn.Sequential(*[
            Downsample(kernel_size=2),
            Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Downsample(kernel_size=2),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
            Upsample(scale_factor=4, mode='nearest')
        ])

        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU()
            #Upsample(scale_factor=2, mode='nearest'),
        ]

        decoder_list_hm = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        self.decoder_hm = torch.nn.Sequential(*decoder_list_hm)

        self.decoder = torch.nn.Sequential(*decoder_list)
        self.aux = torch.nn.Sequential(*[
            #Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(256, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU(),
            Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1,
                                                                         1)),
            ReLU()
        ])

        for name, param in self.aux.named_parameters():
            if "weight" in name:
                nn.init.xavier_normal_(param)
            elif "bias" in name:
                nn.init.constant_(param, 150.0)

        print("Model initialized, Sal_based_Attention_module")
    def __init__(self, use_gpu=True):
        super(Sal_global_Attention, self).__init__()

        self.use_gpu = use_gpu
        # Create encoder based on VGG16 architecture as pointed on salgan architecture
        # Change just 4,5 th maxpooling lyer to 4 scale instead of 2
        Global_Attention_Encoder = global_attention

        # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field
        # each neuron on bottelneck will see (580,580) all viewports  ,
        # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling
        encoder = torch.nn.Sequential(*Global_Attention_Encoder)

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # aggreegate the full architecture encoder-decoder of Sal_global_Attention
        self.Sal_global_Attention = torch.nn.Sequential(
            *(list(encoder.children()) + list(decoder.children())))

        print("Model initialized, Sal_global_Attention")
        print("architecture len :", str(len(self.Sal_global_Attention)))
Ejemplo n.º 19
0
    def  __init__(self, alpha, ema_loc, residual, dropout, use_gpu=True):
        super(SalEMA,self).__init__()

        self.dropout = dropout
        self.residual = residual
        self.use_gpu = use_gpu
        if alpha == None:
            self.alpha = nn.Parameter(torch.Tensor([0.25]))
            print("Initial alpha set to: {}".format(self.alpha))
        else:
            self.alpha = torch.Tensor([alpha])
        assert(self.alpha<=1 and self.alpha>=0)
        self.ema_loc = ema_loc # 30 = bottleneck

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
Ejemplo n.º 20
0
def create_model(input_channels):
    # Create encoder based on VGG16 architecture
    # original_vgg16 = vgg16()
    #
    # # select only convolutional layers
    # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

    # new enconder
    encoder = [
        Conv2d(input_channels,
               64,
               kernel_size=(3, 3),
               stride=(1, 1),
               padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        MaxPool2d(kernel_size=2,
                  stride=2,
                  padding=0,
                  dilation=1,
                  ceil_mode=False),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU()
    ]

    # define decoder based on VGG16 (inverse order and Upsampling layers)
    decoder_list = [
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(512),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(256),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(128),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        BatchNorm2d(64),
        ReLU(),
        Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
        Sigmoid(),
    ]
    encoder = torch.nn.Sequential(*encoder)
    decoder = torch.nn.Sequential(*decoder_list)

    # assamble the full architecture encoder-decoder
    model = torch.nn.Sequential(*(list(encoder.children()) +
                                  list(decoder.children())))

    return model
Ejemplo n.º 21
0
    def  __init__(self, alpha, ema_loc_1, ema_loc_2, use_gpu=True):
        super(SalGAN_EMA2,self).__init__()

        self.use_gpu = use_gpu
        self.alpha = alpha
        self.ema_loc_1 = ema_loc_1 # 30 = bottleneck
        self.ema_loc_2 = ema_loc_2 # 30 = bottleneck
        assert(self.alpha<=1 and self.alpha>=0)

        # Create encoder based on VGG16 architecture
        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # assamble the full architecture encoder-decoder
        self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, EMAs located at {} and {}".format(self.salgan[self.ema_loc_1], self.salgan[self.ema_loc_2]))
Ejemplo n.º 22
0
    def __init__(self):
        super(SalEMA, self).__init__()

        self.dropout = False
        self.residual = False
        self.use_gpu = True

        self.alpha = nn.Parameter(torch.Tensor([0.1]))
        self.ema_loc = 30  # 30 = bottleneck

        original_vgg16 = vgg16()

        # select only convolutional layers
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers)
        decoder_list = [
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1),
                   padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),
            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        self.salgan = torch.nn.Sequential(*(list(encoder.children()) +
                                            list(decoder.children())))

        print("Model initialized, SalEMA")
Ejemplo n.º 23
0
def dcn_vgg(input_channels):

    model = nn.Sequential(

        Conv2d(input_channels, 64, kernel_size=(3, 3), padding=0),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), padding=0),
        ReLU()
        MaxPool2d(kernel_size=(2,2), stride=(2,2))

        Conv2d(64, 128, kernel_size=(3, 3), padding=0),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), padding=0),
        ReLU()
        MaxPool2d(kernel_size=(2,2), stride=(2,2))

        Conv2d(128, 256, kernel_size=(3, 3), padding=0),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), padding=0),
        ReLU()
        MaxPool2d(kernel_size=(2,2), stride=(2,2))

        Conv2d(256, 512, kernel_size=(3, 3), padding=0),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), padding=0),
        ReLU()
        MaxPool2d(kernel_size=(2,2), stride=(2,2))

        Conv2d(512, 512, kernel_size=(3, 3), padding=0),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), padding=0),
        ReLU()
        Conv2d(512, 512, kernel_size=(3, 3), padding=0),
        ReLU()

        )

    return model
Ejemplo n.º 24
0
    def  __init__(self, use_gpu=True):
        super(Salgan360,self).__init__()

        self.use_gpu = use_gpu
        # Create encoder based on VGG16 architecture as pointed on salgan architecture 
        original_vgg16 = vgg16()

        # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 
        # each neuron on bottelneck will see just (212,212) viewport during sliding  ,
        # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling
        encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

        # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode)
        decoder_list=[
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Upsample(scale_factor=2, mode='nearest'),

            Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
            ReLU(),
            Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0),
            Sigmoid(),
        ]

        decoder = torch.nn.Sequential(*decoder_list)

        # aggreegate the full architecture encoder-decoder of salgan360
        self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children())))

        print("Model initialized, SalGAN360")
        print("architecture len :",str(len(self.Salgan360)))
Ejemplo n.º 25
0
def create_model():
    # Create encoder based on VGG16 architecture
    original_vgg16 = vgg16(pretrained=True)

    # select only convolutional layers
    encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30])

    # define decoder based on VGG16 (inverse order and Upsampling layers)
    decoder_list = [
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Upsample(scale_factor=2, mode='nearest'),
        Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
        ReLU(),
        Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)),
        Sigmoid(),
    ]

    decoder = torch.nn.Sequential(*decoder_list)

    # assamble the full architecture encoder-decoder
    model = torch.nn.Sequential(*(list(encoder.children()) +
                                  list(decoder.children())))

    return model