def __init__(self, encoder, decoder): super().__init__() self.encoder = build_component(encoder) if hasattr(self.encoder, 'out_channels'): decoder['in_channels'] = self.encoder.out_channels self.decoder = build_component(decoder)
def __init__(self, encoder, decoder): super(PConvEncoderDecoder, self).__init__() self.encoder = build_component(encoder) self.decoder = build_component(decoder) # support fp16 self.fp16_enabled = False
def __init__(self, encoder, decoder1, neck, decoder2, train_encoder=True, train_decoder1=True, train_decoder2=True): super(SimpleShareEncoderDecoder, self).__init__() self.encoder = build_component(encoder) self.neck = build_component(neck) decoder1['in_channels'] = self.encoder.out_channels decoder2['in_channels'] = self.encoder.out_channels self.decoder = build_component(decoder1) self.decoder2 = build_component(decoder2) self.train_encoder = train_encoder self.train_decoder1 = train_decoder1 self.train_decoder2 = train_decoder2 if not train_encoder: self.encoder.eval() for param in self.encoder.parameters(): param.requires_grad = False if not train_decoder1: self.decoder.eval() for param in self.decoder.parameters(): param.requires_grad = False if not train_decoder2: self.decoder2.eval() for param in self.decoder2.parameters(): param.requires_grad = False
def __init__(self, encoder=dict(type='AOTEncoder'), decoder=dict(type='AOTDecoder'), dilation_neck=dict(type='AOTBlockNeck')): super().__init__() self.encoder = build_component(encoder) self.decoder = build_component(decoder) self.dilation_neck = build_component(dilation_neck)
def __init__(self, encoder=dict(type='GLEncoder'), decoder=dict(type='GLDecoder'), dilation_neck=dict(type='GLDilationNeck')): super(GLEncoderDecoder, self).__init__() self.encoder = build_component(encoder) self.decoder = build_component(decoder) self.dilation_neck = build_component(dilation_neck)
def __init__(self, encoder=dict(type='GLEncoder'), decoder=dict(type='GLDecoder'), dilation_neck=dict(type='GLDilationNeck')): super().__init__() self.encoder = build_component(encoder) self.decoder = build_component(decoder) self.dilation_neck = build_component(dilation_neck) # support fp16 self.fp16_enabled = False
def test_mlp_refiner(): model_cfg = dict(type='MLPRefiner', in_dim=8, out_dim=3, hidden_list=[8, 8, 8, 8]) mlp = build_component(model_cfg) # test attributes assert mlp.__class__.__name__ == 'MLPRefiner' # prepare data inputs = torch.rand(2, 8) targets = torch.rand(2, 3) if torch.cuda.is_available(): inputs = inputs.cuda() targets = targets.cuda() mlp = mlp.cuda() data_batch = {'in': inputs, 'target': targets} # prepare optimizer criterion = nn.L1Loss() optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-4) # test train_step output = mlp.forward(data_batch['in']) assert output.shape == data_batch['target'].shape loss = criterion(output, data_batch['target']) optimizer.zero_grad() loss.backward() optimizer.step()
def __init__(self, encoder_attention=dict(type='DeepFillEncoder', encoder_type='stage2_attention'), encoder_conv=dict(type='DeepFillEncoder', encoder_type='stage2_conv'), dilation_neck=dict(type='GLDilationNeck', in_channels=128, act_cfg=dict(type='ELU')), contextual_attention=dict(type='ContextualAttentionNeck', in_channels=128), decoder=dict(type='DeepFillDecoder', in_channels=256)): super(DeepFillRefiner, self).__init__() self.encoder_attention = build_component(encoder_attention) self.encoder_conv = build_component(encoder_conv) self.contextual_attention_neck = build_component(contextual_attention) self.dilation_neck = build_component(dilation_neck) self.decoder = build_component(decoder)
def __init__(self, stage1=dict(type='GLEncoderDecoder', encoder=dict(type='DeepFillEncoder'), decoder=dict(type='DeepFillDecoder', in_channels=128), dilation_neck=dict(type='GLDilationNeck', in_channels=128, act_cfg=dict(type='ELU'))), stage2=dict(type='DeepFillRefiner'), return_offset=False): super(DeepFillEncoderDecoder, self).__init__() self.stage1 = build_backbone(stage1) self.stage2 = build_component(stage2) self.return_offset = return_offset
def test_light_cnn(): cfg = dict(type='LightCNN', in_channels=3) net = build_component(cfg) net.init_weights(pretrained=None) # cpu inputs = torch.rand((2, 3, 128, 128)) output = net(inputs) assert output.shape == (2, 1) # gpu if torch.cuda.is_available(): net.init_weights(pretrained=None) net = net.cuda() output = net(inputs.cuda()) assert output.shape == (2, 1) # pretrained should be str or None with pytest.raises(TypeError): net.init_weights(pretrained=[1])
def test_search_transformer(): model_cfg = dict(type='SearchTransformer') model = build_component(model_cfg) lr_pad_level3 = torch.randn((2, 32, 32, 32)) ref_pad_level3 = torch.randn((2, 32, 32, 32)) ref_level3 = torch.randn((2, 32, 32, 32)) ref_level2 = torch.randn((2, 16, 64, 64)) ref_level1 = torch.randn((2, 8, 128, 128)) s, textures = model(lr_pad_level3, ref_pad_level3, (ref_level3, ref_level2, ref_level1)) t_level3, t_level2, t_level1 = textures assert s.shape == (2, 1, 32, 32) assert t_level3.shape == (2, 32, 32, 32) assert t_level2.shape == (2, 16, 64, 64) assert t_level1.shape == (2, 8, 128, 128)
def __init__(self, encoder, imnet, local_ensemble=True, feat_unfold=True, cell_decode=True, eval_bsize=None): super().__init__() self.local_ensemble = local_ensemble self.feat_unfold = feat_unfold self.cell_decode = cell_decode self.eval_bsize = eval_bsize # model self.encoder = build_backbone(encoder) imnet_in_dim = self.encoder.mid_channels if self.feat_unfold: imnet_in_dim *= 9 imnet_in_dim += 2 # attach coordinates if self.cell_decode: imnet_in_dim += 2 imnet['in_dim'] = imnet_in_dim self.imnet = build_component(imnet)
def __init__(self, encoder, decoder): super(PConvEncoderDecoder, self).__init__() self.encoder = build_component(encoder) self.decoder = build_component(decoder)
def __init__(self, encoder, decoder): super(SimpleEncoderDecoder, self).__init__() self.encoder = build_component(encoder) decoder['in_channels'] = self.encoder.out_channels self.decoder = build_component(decoder)
def __init__(self, in_size, out_size, img_channels=3, rrdb_channels=64, num_rrdbs=23, style_channels=512, num_mlps=8, channel_multiplier=2, blur_kernel=[1, 3, 3, 1], lr_mlp=0.01, default_style_mode='mix', eval_style_mode='single', mix_prob=0.9, pretrained=None, bgr2rgb=False): super().__init__() # input size must be strictly smaller than output size if in_size >= out_size: raise ValueError('in_size must be smaller than out_size, but got ' f'{in_size} and {out_size}.') # latent bank (StyleGANv2), with weights being fixed self.generator = build_component( dict(type='StyleGANv2Generator', out_size=out_size, style_channels=style_channels, num_mlps=num_mlps, channel_multiplier=channel_multiplier, blur_kernel=blur_kernel, lr_mlp=lr_mlp, default_style_mode=default_style_mode, eval_style_mode=eval_style_mode, mix_prob=mix_prob, pretrained=pretrained, bgr2rgb=bgr2rgb)) self.generator.requires_grad_(False) self.in_size = in_size self.style_channels = style_channels channels = self.generator.channels # encoder num_styles = int(np.log2(out_size)) * 2 - 2 encoder_res = [2**i for i in range(int(np.log2(in_size)), 1, -1)] self.encoder = nn.ModuleList() self.encoder.append( nn.Sequential( RRDBFeatureExtractor(img_channels, rrdb_channels, num_blocks=num_rrdbs), nn.Conv2d(rrdb_channels, channels[in_size], 3, 1, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True))) for res in encoder_res: in_channels = channels[res] if res > 4: out_channels = channels[res // 2] block = nn.Sequential( nn.Conv2d(in_channels, out_channels, 3, 2, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True), nn.Conv2d(out_channels, out_channels, 3, 1, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True)) else: block = nn.Sequential( nn.Conv2d(in_channels, in_channels, 3, 1, 1, bias=True), nn.LeakyReLU(negative_slope=0.2, inplace=True), nn.Flatten(), nn.Linear(16 * in_channels, num_styles * style_channels)) self.encoder.append(block) # additional modules for StyleGANv2 self.fusion_out = nn.ModuleList() self.fusion_skip = nn.ModuleList() for res in encoder_res[::-1]: num_channels = channels[res] self.fusion_out.append( nn.Conv2d(num_channels * 2, num_channels, 3, 1, 1, bias=True)) self.fusion_skip.append( nn.Conv2d(num_channels + 3, 3, 3, 1, 1, bias=True)) # decoder decoder_res = [ 2**i for i in range(int(np.log2(in_size)), int(np.log2(out_size) + 1)) ] self.decoder = nn.ModuleList() for res in decoder_res: if res == in_size: in_channels = channels[res] else: in_channels = 2 * channels[res] if res < out_size: out_channels = channels[res * 2] self.decoder.append( PixelShufflePack(in_channels, out_channels, 2, upsample_kernel=3)) else: self.decoder.append( nn.Sequential( nn.Conv2d(in_channels, 64, 3, 1, 1), nn.LeakyReLU(negative_slope=0.2, inplace=True), nn.Conv2d(64, img_channels, 3, 1, 1)))