def __init__(self): super(SalGAN, self).__init__() # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children())))
def __init__(self, requires_grad=False): super().__init__() vgg_pretrained_features = torchvision.models.vgg19( pretrained=True).features self.up5 = Upsample(scale_factor=16, mode='bicubic') self.up4 = Upsample(scale_factor=8, mode='bicubic') self.up3 = Upsample(scale_factor=4, mode='bicubic') self.up2 = Upsample(scale_factor=2, mode='bicubic') self.up1 = Upsample(scale_factor=1, mode='bicubic') self.weights = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0] self.slice1 = torch.nn.Sequential() self.slice2 = torch.nn.Sequential() self.slice3 = torch.nn.Sequential() self.slice4 = torch.nn.Sequential() self.slice5 = torch.nn.Sequential() for x in range(2): self.slice1.add_module(str(x), vgg_pretrained_features[x]) for x in range(2, 7): self.slice2.add_module(str(x), vgg_pretrained_features[x]) for x in range(7, 12): self.slice3.add_module(str(x), vgg_pretrained_features[x]) for x in range(12, 21): self.slice4.add_module(str(x), vgg_pretrained_features[x]) for x in range(21, 30): self.slice5.add_module(str(x), vgg_pretrained_features[x]) if not requires_grad: for param in self.parameters(): param.requires_grad = False
def __init__(self, use_gpu=True): super(Salgan360,self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture as pointed on salgan architecture original_vgg16 = vgg16() # select only convolutional layers first 5 conv blocks , here we keep same receptive field of VGG 212*212 # each neuron on bottelneck will see just (212,212) viewport during sliding , # input (576,288) , features numbers on bottelneck 36*18*512, exclude last maxpooling encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # aggreegate the full architecture encoder-decoder of salgan360 self.Salgan360 = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, SalGAN360") print("architecture len :",str(len(self.Salgan360)))
def create_model(): # Create encoder based on VGG16 architecture original_vgg16 = vgg16(pretrained=True) # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder model = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) return model
def get_image(filmed_net, original_img): """Given FiLMedNet and original image array create new image to visualize prepooled activations. Save this image. input: filmed_net: FiLMedNet objects original_img: np.ndarray """ #Save prepooled activations activations = filmed_net.cf_input activations = filmed_net.classifier[0](activations) activations = filmed_net.classifier[1](activations) activations = filmed_net.classifier[2](activations) #Create average feature map scaled from 0 to 1 f_map = (activations**2).mean(0).mean(0).sqrt() f_map = f_map - f_map.min().expand_as(f_map) f_map = f_map / f_map.max().expand_as(f_map) #Upsample the feature map to the size of the orignal image and add it as an #additional channel. f_map = (255 * f_map).round() upsample = Upsample(size=torch.Size(original_img.shape[:-1]), mode='bilinear') channel = upsample(f_map.unsqueeze(0).unsqueeze(0)) channel = channel.squeeze().unsqueeze(-1).data.numpy() filtered_img = np.concatenate((original_img, channel), axis=2) #Save image filename = args.question.replace(' ', '_').strip(punctuation) imsave(filename + '.png', filtered_img)
def __init__(self, batch_norm=True): super().__init__() self.batch_norm = batch_norm self.c1 = PartialConv2d_3k(3, 64, 5, 2, padding=2, multi_channel=True, return_mask=True) if self.batch_norm: self.bn1 = BatchNorm2d(64) self.c2 = PartialConv2d_3k(64, 128, 3, 2, padding=1, multi_channel=True, return_mask=True) if self.batch_norm: self.bn2 = BatchNorm2d(128) self.c3 = PartialConv2d_3k(128, 128, 3, 1, padding=1, multi_channel=True, return_mask=True) if self.batch_norm: self.bn3 = BatchNorm2d(128) self.up4 = Upsample(scale_factor=2) self.c4 = PartialConv2d_3k(128 + 64, 64, 3, 1, padding=1, multi_channel=True, return_mask=True) self.up5 = Upsample(scale_factor=2) self.c5 = PartialConv2d_3k(64 + 3, 3, 3, 1, padding=1, multi_channel=True, return_mask=True)
def __init__(self): super(Decoder,self,pretainer = True).__init__() decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor= 4, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder = torch.nn.Sequential(*decoder_list) self._initialize_weights() print("decoder initialized") print("architecture len :",str(len(self.Autoencoder)))
def acl_vgg(data, stateful): dcn = dcn_vgg() att_module = nn.Sequential( MaxPool2d(kernel_size=(2,2), stride=(2,2)) #The channels being input to maxpool are 512 (dcn output), to find the output channels of maxpool divide by the kernel size. 512/2=256 Conv2d(256, 64, kernel_size=(3, 3), padding=0) ReLU() Conv2D(64, 128, kernel_size=(3, 3), padding=0) ReLU() MaxPool2d(kernel_size=(2,2), stride=(2,2)) Conv2D(128, 64, kernel_size=(3, 3), padding=0) ReLU() Conv2D(64, 128, kernel_size=(3, 3), padding=0) ReLU() Conv2D(128, 1, kernel_size=(1, 1), padding=0) Sigmoid() Upsample(scale_factor=4, mode='nearest') ) outs = TimeDistributed(dcn)(data) attention = TimeDistributed(att_module)(outs) f_attention = TimeDistributed()(attention.view(attention.size()[0], -1)) #flatten f_attention = TimeDistributed()(f_attention.expand(512)) #repeatvector f_attention = TimeDistributed()(f_attention.transpose().unsqueeze(0)) #permute f_attention = TimeDistributed()(f_attention.reshape((32, 40, 512))) m_outs = outs * f_attention #elementwise multiplication outs = outs + m_outs ### This needs to change clstm = ConvLSTMCell(use_gpu=False, input_size=512, hidden_size=256, kernel_size=(3,3)) outs = clstm(outs) ### produce_smaps = nn.Sequential( #InputDimensions will be figured out after changing the ConvLSTM Conv2D(InputDimensions, 1, kernel_size=(1, 1), padding=0) Sigmoid() Upsample(scale_factor=4, mode='nearest') ) outs = TimeDistributed(produce_smaps)(outs) attention = TimeDistributed(Upsample(scale_factor=2, mode='nearest'))(attention) return [outs, outs, outs, attention, attention, attention]
def __init__(self, batch_norm=True): super().__init__() self.batch_norm = batch_norm self.c1 = Conv2d(3, 64, 5, 2, padding=2) if self.batch_norm: self.bn1 = BatchNorm2d(64) self.c2 = Conv2d(64, 128, 3, 2, padding=1) if self.batch_norm: self.bn2 = BatchNorm2d(128) self.c3 = Conv2d(128, 128, 3, 1, padding=1) if self.batch_norm: self.bn3 = BatchNorm2d(128) self.up4 = Upsample(scale_factor=2) self.c4 = Conv2d(128 + 64, 64, 3, 1, padding=1) self.up5 = Upsample(scale_factor=2) self.c5 = Conv2d(64 + 3, 3, 3, 1, padding=1)
def __init__(self, class_num: int = 80): self.class_num = class_num self.upsample = Upsample(None, 2, nearest) self.cat = Concat(1) self.initBlock1() self.initBlock2() self.initBlock3() self.initBlock4() self.initBlock5() self.initDetect()
def generate_patches(src_path, files, set_path, crop_size, img_format, upsampling): img_path = os.path.join(src_path, files) img = Image.open(img_path).convert('RGB') if upsampling > 0: img = ToTensor()(img).unsqueeze_(0) m = Upsample(scale_factor=abs(upsampling), mode='nearest') img = m(img) img = tensor2img(img) name, _ = files.split('.') filedir = os.path.join(set_path, 'a') filedirb = os.path.join(set_path, 'b') if not dir_exists(filedir): mkdir(filedir) mkdir(filedirb) img = np.array(img) h, w = img.shape[0], img.shape[1] if crop_size == None: img = np.copy(img) img_patches = np.expand_dims(img, 0) else: rem_h = (h % crop_size[0]) rem_w = (w % crop_size[1]) img = img[:h - rem_h, :w - rem_w] img_patches = crop(img, crop_size) # print('Cropped') for i in range(min(len(img_patches), 3)): img = Image.fromarray(img_patches[i]) # print(np.asarray(compress(torch.Tensor(img_patches[0]), 4) * (2**4 - 1))) imgs = tensor2img(compress(ToTensor()(img_patches[i]), 3)) # print('Compressed') img.save(os.path.join(filedir, '{}_{}.{}'.format(name, i, img_format))) # print('OK') imgs.save( os.path.join(filedirb, '{}_{}.{}'.format(name, i, img_format)))
def forward(self, seg_map, dense_map, target, seg_mask, infer=False): seg_map = seg_map.float().cuda() dense_map = dense_map.float().cuda() target = target.float().cuda() feat_map_total = [] for each_class in range(self.opt.label_nc): inp_enc = seg_map[:, each_class:each_class + 1, :, :] feat_map_each_class = self.netE.forward(inp_enc) # bs, 10, H, w feat_map_total.append(feat_map_each_class) feat_map_total = torch.cat([i for i in feat_map_total], dim=1) # local pooling step and Upscaling local_avg_pool_fn = nn.AvgPool2d((64, 64)) feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total) upscale_fn = Upsample(scale_factor=64, mode='nearest') feat_map_final = upscale_fn(feat_map_each_class_pooled) # Gan Input input_concat = torch.cat((dense_map, feat_map_final), dim=1).cuda() fake_image = self.netG.forward(input_concat) # Fake Detection and Loss pred_fake_pool = self.discriminate(seg_map, fake_image, use_pool=True) loss_D_fake = self.criterionGAN(pred_fake_pool, False) # Real Detection and Loss pred_real = self.discriminate(seg_map, target) loss_D_real = self.criterionGAN(pred_real, True) # GAN loss (Fake Passability Loss) pred_fake = self.netD.forward(torch.cat((seg_map, fake_image), dim=1)) loss_G_GAN = self.criterionGAN(pred_fake, True) ############################### # Crossentropy loss loss_G_CE = 0 loss_G_CE = self.criterionCE(fake_image, seg_mask) return [ self.loss_filter(loss_G_GAN, loss_G_CE, loss_D_real, loss_D_fake), None if not infer else fake_image ]
def __init__(self, filters, in_channels, block_i=0, up_conv_z2=False): super().__init__() print(f'Creating UpBlock with {filters} filters') self.conv_part = Sequential() conv0 = Conv3d(in_channels=in_channels, out_channels=filters, kernel_size=(3, 3, 3), padding=(1, 1, 1)) self.conv_part.add_module(f'u{block_i}-0_conv3d', conv0) self.conv_part.add_module(f'u{block_i}-0_relu', ReLU()) uz = int(up_conv_z2) conv1 = Conv3d(in_channels=filters, out_channels=filters, kernel_size=(3, 3, 1 + 2 * uz), padding=(1, 1, uz)) self.conv_part.add_module(f'u{block_i}-1_conv3d', conv1) self.conv_part.add_module(f'u{block_i}-1_relu', ReLU()) self.up_sample = Upsample(scale_factor=(2, 2, 1))
def inference_forward_shape(self, query, ref, dense_map): query = query.float().cuda() dense_map = dense_map.float().cuda() ref = ref.float().cuda() query_ref_mixed = torch.cat( (query[:, 0:5, :, :], ref[:, 5:8, :, :], query[:, 8:, :, :]), axis=1) # query_ref_mixed = torch.cat( # (query[:, 0:9, :, :], ref[:, 5:8, :, :] , query[:, 8:9, :, :], ref[:, 9:10, :, :], query[:, 10:12, :, :], ref[:, 12:13, :, :], # query[:, 13:16, :, :],ref[:, 16:20, :, :]), axis=1) # query_ref_mixed = torch.cat((query[:, 0:9, :, :], ref[:, 9:10, :, :], query[:, 10:12, :, :], # ref[:, 12:13, :, :], query[:, 13:16, :, :], ref[:, 16:20, :, :]), axis=1) feat_map_total = [] for each_class in range(self.opt.label_nc): # bs, 1, H, w inp_enc = query_ref_mixed[:, each_class:each_class+1, :, :] with torch.no_grad(): feat_map_each_class = self.netE.forward( inp_enc) # bs, 10, H, w feat_map_total.append(feat_map_each_class) feat_map_total = torch.cat([i for i in feat_map_total], dim=1) # local pooling step local_avg_pool_fn = nn.AvgPool2d((64, 64)) feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total) # Upscaling upscale_fn = Upsample(scale_factor=64, mode='nearest') feat_map_final = upscale_fn(feat_map_each_class_pooled) input_concat = torch.cat((dense_map, feat_map_final), dim=1) with torch.no_grad(): fake_image = self.netG.forward(input_concat) return query_ref_mixed, fake_image
def inference_enc(self, query, dense_map, ref, cloth_part='uppercloth'): query = query.float().cuda() dense_map = dense_map.float().cuda() ref = ref.float().cuda() # Cloth part to mix if cloth_part == 'uppercloth': query_ref_mixed = torch.cat( (query[:, 0:5, :, :], ref[:, 5:8, :, :], query[:, 8:, :, :]), axis=1) elif cloth_part == 'bottomcloth': query_ref_mixed = torch.cat( (query[:, 0:9, :, :], ref[:, 9:10, :, :], query[:, 10:12, :, :], ref[:, 12:13, :, :], query[:, 13:16, :, :], ref[:, 16:20, :, :]), axis=1) # Encoder feat_map_total = [] for each_class in range(self.opt.label_nc): inp_enc = query_ref_mixed[:, each_class:each_class + 1, :, :] with torch.no_grad(): feat_map_each_class = self.netE.forward( inp_enc) # bs, 10, H, w feat_map_total.append(feat_map_each_class) feat_map_total = torch.cat([i for i in feat_map_total], dim=1) # Local pooling step and Upscaling local_avg_pool_fn = nn.AvgPool2d((64, 64)) feat_map_each_class_pooled = local_avg_pool_fn(feat_map_total) upscale_fn = Upsample(scale_factor=64, mode='nearest') feat_map_final = upscale_fn(feat_map_each_class_pooled) # GAN input_concat = torch.cat((dense_map, feat_map_final), dim=1) with torch.no_grad(): fake_image = self.netG.forward(input_concat) return query_ref_mixed, fake_image
def create_model(input_channels): # Create encoder based on VGG16 architecture # original_vgg16 = vgg16() # # # select only convolutional layers # encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # new enconder encoder = [ Conv2d(input_channels, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU() ] # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(512), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(256), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(128), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), BatchNorm2d(64), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=(0, 0)), Sigmoid(), ] encoder = torch.nn.Sequential(*encoder) decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder model = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) return model
def __init__(self, alpha, ema_loc, residual, dropout, use_gpu=True): super(SalEMA,self).__init__() self.dropout = dropout self.residual = residual self.use_gpu = use_gpu if alpha == None: self.alpha = nn.Parameter(torch.Tensor([0.25])) print("Initial alpha set to: {}".format(self.alpha)) else: self.alpha = torch.Tensor([alpha]) assert(self.alpha<=1 and self.alpha>=0) self.ema_loc = ema_loc # 30 = bottleneck # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, EMA located at {}".format(self.salgan[self.ema_loc]))
def __init__(self, alpha, ema_loc_1, ema_loc_2, use_gpu=True): super(SalGAN_EMA2,self).__init__() self.use_gpu = use_gpu self.alpha = alpha self.ema_loc_1 = ema_loc_1 # 30 = bottleneck self.ema_loc_2 = ema_loc_2 # 30 = bottleneck assert(self.alpha<=1 and self.alpha>=0) # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list=[ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children())+list(decoder.children()))) print("Model initialized, EMAs located at {} and {}".format(self.salgan[self.ema_loc_1], self.salgan[self.ema_loc_2]))
def __init__(self, cin, cout, k, stride=1, padding=0): super(UpscaleConv, self).__init__() self.upsample1 = Upsample(scale_factor=2, mode="nearest") self.conv2 = nn.Conv2d(cin, cout, k, stride=1, padding=padding)
def __init__(self): super(SalEMA, self).__init__() self.dropout = False self.residual = False self.use_gpu = True self.alpha = nn.Parameter(torch.Tensor([0.1])) self.ema_loc = 30 # 30 = bottleneck original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) print("Model initialized, SalEMA")
self.slice5.add_module(str(x), vgg_pretrained_features[x]) if not requires_grad: for param in self.parameters(): param.requires_grad = False def forward(self, X): h_relu1 = self.slice1(X) h_relu2 = self.slice2(h_relu1) h_relu3 = self.slice3(h_relu2) h_relu4 = self.slice4(h_relu3) h_relu5 = self.slice5(h_relu4) out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] return out up5 = Upsample(scale_factor=16, mode='bicubic') up4 = Upsample(scale_factor=8, mode='bicubic') up3 = Upsample(scale_factor=4, mode='bicubic') up2 = Upsample(scale_factor=2, mode='bicubic') up1 = Upsample(scale_factor=1, mode='bicubic') to_pil = ToPILImage() to_tensor = ToTensor() def one_hot_encoding(semantic, num_classes=20): one_hot = torch.zeros(num_classes, semantic.size(1), semantic.size(2)) for class_id in range(num_classes): one_hot[class_id,:,:] = (semantic.squeeze(0)==class_id) one_hot = one_hot[:num_classes-1,:,:] return one_hot
def main(args): # device device = torch.device(args.device) # tensorboard logger_tb = logger.Logger(log_dir=args.experiment_name) # load img img = plt.imread(args.input_img) # def norm(x): return (x - x.min(axis=(0, 1))) / (x.max(axis=(0, 1)) - x.min(axis=(0, 1))) norm = lambda x: (x - x.min(axis=(0, 1))) / (x.max(axis=(0, 1)) - x.min( axis=(0, 1))) img = norm(img) img = np.transpose(img, (2, 0, 1)) # load pretrained model vgg19 = models.vgg19(pretrained=True).features.eval() model = utils.build_model(vgg19, optim_layer=args.layer, device=device) model = model.to(device) # loss function loss_fn = utils.L2Loss() # Populate oct_imgs with different sized zooms of the original image oct_imgs = [img] for oct_itr in range(args.num_octave): zoom_img = zoom(oct_imgs[-1], (1, 1 / args.octave_ratio, 1 / args.octave_ratio)) oct_imgs.append(zoom_img) oct_imgs = [utils.process_tensor(oct_img, device) for oct_img in oct_imgs] ori_oct_imgs = [oct_img.clone() for oct_img in oct_imgs] while len(oct_imgs) > 0: oct_img = oct_imgs.pop() ori_oct_img = ori_oct_imgs.pop() idx = len(oct_imgs) print(f"Deep dreaming on octave: {idx}") for epoch in range(args.epoch): model.zero_grad() output = model.forward(oct_img) loss = loss_fn(output) loss.backward() grad = oct_img.grad.cpu().numpy() lr = args.lr / np.abs(grad).mean() # apply gaussian smoothing on gradient sigma = (epoch * 4.0) / args.epoch + 0.5 grad_smooth1 = gaussian_filter(grad, sigma=sigma) grad_smooth2 = gaussian_filter(grad, sigma=sigma * 2) grad_smooth3 = gaussian_filter(grad, sigma=sigma * 0.5) grad = (grad_smooth1 + grad_smooth2 + grad_smooth3) grad = torch.Tensor(grad).to(device) # backpropagate on ocatve image oct_img.data += lr * grad.data oct_img.data.clamp_(0, 1) oct_img.grad.data.zero_() # display image on tensorboard dream_img = oct_img.squeeze().cpu().detach().numpy().copy() logger_tb.update_loss('loss ', loss.item(), epoch) logger_tb.update_image(f'transformation oct{idx}', dream_img, epoch) if len(oct_imgs) == 0: break # add the "dreamed" portion of the current octave to the next octave h = oct_imgs[-1].shape[2] w = oct_imgs[-1].shape[3] difference = oct_img.data - ori_oct_img.data difference = Upsample(size=(h, w), mode='nearest')(difference) oct_imgs[-1].data += difference
def __init__(self, seed_init, freeze=True, use_gpu=True): super(SalCLSTM56, self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture original_vgg16 = vgg16() # select only convolutional layers encoder = torch.nn.Sequential(*list(original_vgg16.features)[:30]) # define decoder based on VGG16 (inverse order and Upsampling layers) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), #During Upsampling operation we may end up losing 1 dimension if it was an odd number before ] decoder = torch.nn.Sequential(*decoder_list) # assamble the full architecture encoder-decoder self.salgan = torch.nn.Sequential(*(list(encoder.children()) + list(decoder.children()))) #print(self.salgan) # ConvLSTM self.input_size = 128 self.hidden_size = 128 self.Gates = nn.Conv2d( in_channels=self.input_size + self.hidden_size, out_channels=4 * self.hidden_size, kernel_size=(3, 3), padding=1) #padding 1 to preserve HxW dimensions final_convolutions = [ Conv2d(self.hidden_size, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.final_convs = torch.nn.Sequential(*final_convolutions) # Initialize weights of ConvLSTM torch.manual_seed(seed_init) for name, param in self.Gates.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 0) else: print( "There is some uninitiallized parameter. Check your parameters and try again." ) exit() for name, param in self.final_convs.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 0) else: print( "There is some uninitiallized parameter. Check your parameters and try again." ) exit() # Freeze SalGAN if freeze: for child in self.salgan.children(): for param in child.parameters(): param.requires_grad = False
import torch.nn as nn from torch.nn.modules.upsampling import Upsample from tlkit.utils import load_state_dict_from_path from .superposition import HashConv2d, ProjectedConv2d from .basic_models import zero_fn, ScaleLayer upsampler = Upsample(scale_factor=2, mode='nearest') def load_submodule(model_class, model_weights_path, model_kwargs, backup_fn=zero_fn): # If there is a model, use it! If there is initialization, use it! If neither, use backup_fn if model_class is not None: model = model_class(**model_kwargs) if model_weights_path is not None: model, _ = load_state_dict_from_path(model, model_weights_path) else: model = backup_fn assert model_weights_path is None, 'cannot have weights without model' return model def _make_layer(in_channels, out_channels, num_groups=2, kernel_size=3, stride=1, padding=0,
def __init__(self): super(Scanpath_based_Attention_module, self).__init__() Based_Attention_Module = based_AM soft_sam = SpatialSoftArgmax2d(normalized_coordinates=False) self.soft_sam = soft_sam self.encoder = torch.nn.Sequential(*Based_Attention_Module) self.attention_module = torch.nn.Sequential(*[ Downsample(kernel_size=2), Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size=2), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), Upsample(scale_factor=4, mode='nearest') ]) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU() #Upsample(scale_factor=2, mode='nearest'), ] decoder_list_hm = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder_hm = torch.nn.Sequential(*decoder_list_hm) self.decoder = torch.nn.Sequential(*decoder_list) self.aux = torch.nn.Sequential(*[ #Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU() ]) for name, param in self.aux.named_parameters(): if "weight" in name: nn.init.xavier_normal_(param) elif "bias" in name: nn.init.constant_(param, 150.0) print("Model initialized, Sal_based_Attention_module")
def __init__(self, use_gpu=True): super(Sal_global_Attention, self).__init__() self.use_gpu = use_gpu # Create encoder based on VGG16 architecture as pointed on salgan architecture # Change just 4,5 th maxpooling lyer to 4 scale instead of 2 Global_Attention_Encoder = global_attention # select only convolutional layers first 5 conv blocks ,cahnge maxpooling=> enlarge receptive field # each neuron on bottelneck will see (580,580) all viewports , # input (576,288) , features numbers on bottelneck (9*4)*512, exclude last maxpooling encoder = torch.nn.Sequential(*Global_Attention_Encoder) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] decoder = torch.nn.Sequential(*decoder_list) # aggreegate the full architecture encoder-decoder of Sal_global_Attention self.Sal_global_Attention = torch.nn.Sequential( *(list(encoder.children()) + list(decoder.children()))) print("Model initialized, Sal_global_Attention") print("architecture len :", str(len(self.Sal_global_Attention)))
def __init__(self, in_nc=3, out_nc=3, N=8, S=8, upscale=4): super(ORDSRModel, self).__init__() self.upscale = upscale self.N = N self.S = S self.upsampling = Upsample(scale_factor=self.upscale, mode='bicubic') # ================================ Extract Shallow DCT Features ================================ # self.DCTTrans = DCTConv(stride=self.S, padding=0, blocksize=self.N) # ================================ Learn the High DCT Features ================================ # self.conv0 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2, bias=True) self.relu0 = nn.LeakyReLU(0.2, inplace=True) self.conv1 = nn.Conv2d(in_channels=60, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu1 = nn.LeakyReLU(0.2, inplace=True) self.conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu2 = nn.LeakyReLU(0.2, inplace=True) self.conv3 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu3 = nn.LeakyReLU(0.2, inplace=True) self.conv4 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu4 = nn.LeakyReLU(0.2, inplace=True) self.conv5 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu5 = nn.LeakyReLU(0.2, inplace=True) self.conv6 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu6 = nn.LeakyReLU(0.2, inplace=True) self.conv7 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu7 = nn.LeakyReLU(0.2, inplace=True) self.conv8 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu8 = nn.LeakyReLU(0.2, inplace=True) self.conv9 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu9 = nn.LeakyReLU(0.2, inplace=True) self.conv10 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu10 = nn.LeakyReLU(0.2, inplace=True) self.conv11 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu11 = nn.LeakyReLU(0.2, inplace=True) self.conv12 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu12 = nn.LeakyReLU(0.2, inplace=True) self.conv13 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1, bias=True) self.relu13 = nn.LeakyReLU(0.2, inplace=True) self.conv14 = nn.Conv2d(in_channels=64, out_channels=60, kernel_size=3, stride=1, padding=1, bias=True) self.relu14 = nn.LeakyReLU(0.2, inplace=True)
def __init__(self): super(Sal_based_Attention_module, self).__init__() # Create encoder based on VGG16 architecture as pointed on salgan architecture and apply aforementionned changes Based_Attention_Module = based_AM # select only first 5 conv blocks , here we keep same receptive field of VGG 212*212 # each neuron on bottelneck will see just (244,244) viewport during sliding , # input (640,320) , features numbers on bottelneck 40*20*512, exclude last maxpooling of salgan ,receptive # features number on AM boottlneck 10*5*128 # attentioin moduels receptive field enlarged (676,676) self.encoder = torch.nn.Sequential(*Based_Attention_Module) self.attention_module = torch.nn.Sequential(*[ Downsample(kernel_size=2), Conv2d(512, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Downsample(kernel_size=2), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), Upsample(scale_factor=4, mode='nearest') ]) #self.reshape = Reshape(-1,512,40,20) # define decoder based on VGG16 (inverse order and Upsampling layers , chose nearest mode) decoder_list = [ Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(512, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Upsample(scale_factor=2, mode='nearest'), Conv2d(128, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), ReLU(), Conv2d(64, 1, kernel_size=(1, 1), stride=(1, 1), padding=0), Sigmoid(), ] self.decoder = torch.nn.Sequential(*decoder_list) print("Model initialized, Sal_based_Attention_module")
def mae_features(config_file_path, gpu_ids, dataroot, data_origin): soft_fdr = os.path.join(dataroot, 'mae_features_' + data_origin) if not os.path.exists(soft_fdr): os.makedirs(soft_fdr) # load experiment setting with open(config_file_path, 'r') as stream: config = yaml.load(stream, Loader=yaml.FullLoader) # activate GPUs config['gpu_ids'] = gpu_ids gpu = int(gpu_ids) # get data_loaders cfg_test_loader = config['test_dataloader'] cfg_test_loader['dataset_args']['dataroot'] = dataroot test_loader = trainer_util.get_dataloader(cfg_test_loader['dataset_args'], cfg_test_loader['dataloader_args']) class VGG19(torch.nn.Module): def __init__(self, requires_grad=False): super().__init__() vgg_pretrained_features = torchvision.models.vgg19(pretrained=True).features self.slice1 = torch.nn.Sequential() self.slice2 = torch.nn.Sequential() self.slice3 = torch.nn.Sequential() self.slice4 = torch.nn.Sequential() self.slice5 = torch.nn.Sequential() for x in range(2): self.slice1.add_module(str(x), vgg_pretrained_features[x]) for x in range(2, 7): self.slice2.add_module(str(x), vgg_pretrained_features[x]) for x in range(7, 12): self.slice3.add_module(str(x), vgg_pretrained_features[x]) for x in range(12, 21): self.slice4.add_module(str(x), vgg_pretrained_features[x]) for x in range(21, 30): self.slice5.add_module(str(x), vgg_pretrained_features[x]) if not requires_grad: for param in self.parameters(): param.requires_grad = False def forward(self, X): h_relu1 = self.slice1(X) h_relu2 = self.slice2(h_relu1) h_relu3 = self.slice3(h_relu2) h_relu4 = self.slice4(h_relu3) h_relu5 = self.slice5(h_relu4) out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] return out from torch.nn.modules.upsampling import Upsample up5 = Upsample(scale_factor=16, mode='bicubic') up4 = Upsample(scale_factor=8, mode='bicubic') up3 = Upsample(scale_factor=4, mode='bicubic') up2 = Upsample(scale_factor=2, mode='bicubic') up1 = Upsample(scale_factor=1, mode='bicubic') to_pil = ToPILImage() # Going through visualization loader weights = [1.0/32, 1.0/16, 1.0/8, 1.0/4, 1.0] vgg = VGG19().cuda(gpu) with torch.no_grad(): for i, data_i in enumerate(test_loader): print('Generating image %i out of %i'%(i+1, len(test_loader))) img_name = os.path.basename(data_i['original_path'][0]) original = data_i['original'].cuda(gpu) synthesis = data_i['synthesis'].cuda(gpu) x_vgg, y_vgg = vgg(original), vgg(synthesis) feat5 = torch.mean(torch.abs(x_vgg[4] - y_vgg[4]), dim=1).unsqueeze(1) feat4 = torch.mean(torch.abs(x_vgg[3] - y_vgg[3]), dim=1).unsqueeze(1) feat3 = torch.mean(torch.abs(x_vgg[2] - y_vgg[2]), dim=1).unsqueeze(1) feat2 = torch.mean(torch.abs(x_vgg[1] - y_vgg[1]), dim=1).unsqueeze(1) feat1 = torch.mean(torch.abs(x_vgg[0] - y_vgg[0]), dim=1).unsqueeze(1) img_5 = up5(feat5) img_4 = up4(feat4) img_3 = up3(feat3) img_2 = up2(feat2) img_1 = up1(feat1) combined = weights[0] * img_1 + weights[1] * img_2 + weights[2] * img_3 + weights[3] * img_4 + weights[ 4] * img_5 min_v = torch.min(combined.squeeze()) max_v = torch.max(combined.squeeze()) combined = (combined.squeeze() - min_v) / (max_v - min_v) combined = to_pil(combined.cpu()) pred_name = 'mea_' + img_name combined.save(os.path.join(soft_fdr, pred_name))
import matplotlib.pylab as plt import seaborn as sns from tqdm import trange import torch import torchvision from torch.nn.modules.upsampling import Upsample from lucent.optvis import render #, param, transform, objectives from rosettastone.utils import show_grid # TODO: as parameter SIZE = 227 #224 UPSAMPLE = Upsample(size=(SIZE, SIZE), mode='bilinear', align_corners=False) def crop_by_max_activation(raw_acts, img, quantile_threshold=0.1, shape=(SIZE, SIZE)): upsampled_acts = UPSAMPLE(raw_acts[ None, None, :, :]).cpu().numpy() #upsample(raw_acts.cpu().numpy(), shape) mask_threshold = np.quantile(upsampled_acts, 1 - quantile_threshold) mask = upsampled_acts > mask_threshold if not mask.any(): return None _, _, rows, cols = np.nonzero(mask)