def __init__(self, dim, use_bias): super(ResnetBlock, self).__init__() conv_block = [] # conv_block += [nn.ReflectionPad2d(1), # nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias), # nn.InstanceNorm2d(dim), # nn.ReLU(True)] conv_block += [ ReflectionPad2d(1), dygraph.Conv2D(dim, dim, 3, bias_attr=use_bias), dygraph.InstanceNorm(dim), ReLU(True) ] # conv_block += [nn.ReflectionPad2d(1), # nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias), # nn.InstanceNorm2d(dim)] conv_block += [ ReflectionPad2d(1), dygraph.Conv2D(dim, dim, 3, bias_attr=use_bias), dygraph.InstanceNorm(dim) ] # self.conv_block = nn.Sequential(*conv_block) self.conv_block = dygraph.Sequential(*conv_block)
def __init__(self, in_features, kernel_size, padding, **kwargs): super(ResBlock2d, self).__init__(**kwargs) self.conv1 = dygraph.Conv2D(num_channels=in_features, num_filters=in_features, filter_size=kernel_size, padding=padding) self.conv2 = dygraph.Conv2D(num_channels=in_features, num_filters=in_features, filter_size=kernel_size, padding=padding) self.norm1 = dygraph.BatchNorm(num_channels=in_features, momentum=0.1) self.norm2 = dygraph.BatchNorm(num_channels=in_features, momentum=0.1)
def __init__(self, block, layers, num_classes=65, groups=1, width_per_group=64): super(MyCNN, self).__init__() norm_layer = dg.BatchNorm self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 self.groups = groups self.base_width = width_per_group self.conv1 = dg.Conv2D(1, self.inplanes, filter_size=3, stride=1, padding=1) self.bn1 = norm_layer(self.inplanes, param_attr=fluid.ParamAttr( initializer=fluid.initializer.Constant(1.0))) self.maxpool = dg.Pool2D(pool_size=3, pool_stride=2, pool_padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) # self.layer3 = self._make_layer(block, 256, layers[2], stride=2) # self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = dg.Pool2D(5, pool_type="avg") self.fc = Linear(128 * block.expansion, num_classes)
def conv1x1(in_planes, out_planes, stride=1): """1x1 convolution""" return dg.Conv2D(in_planes, out_planes, filter_size=3, stride=stride, padding=1)
def __init__(self, backbone, transformer, num_classes, num_queries, aux_loss=False): """ Initializes the model. Parameters: backbone: See backbone.py transformer: See transformer.py num_classes: number of object classes num_queries: number of object queries, ie the detection slot. This is the maximal number of objects DETR can detect in a single image. For COCO, we recommend 100 queries. aux_loss: True if auxiliary decoding losses (loss at each decoder layer) are to be used. """ super().__init__() self.num_queries = num_queries self.transformer = transformer hidden_dim = transformer.d_model self.class_embed = dg.Linear(hidden_dim, num_classes + 1) self.bbox_embed = MLP(hidden_dim, hidden_dim, 4, 3) self.query_embed = dg.Embedding((num_queries, hidden_dim)) self.input_proj = dg.Conv2D(backbone.num_channels, hidden_dim, filter_size=1) self.backbone = backbone self.aux_loss = aux_loss
def __init__(self, dim, use_bias): super(ResnetAdaILNBlock, self).__init__() # self.pad1 = nn.ReflectionPad2d(1) # self.conv1 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias) # self.norm1 = adaILN(dim) # self.relu1 = nn.ReLU(True) self.pad1 = ReflectionPad2d(1) self.conv1 = dygraph.Conv2D(dim, dim, 3, bias_attr=use_bias) self.norm1 = adaILN(dim) self.relu1 = ReLU(True) # self.pad2 = nn.ReflectionPad2d(1) # self.conv2 = nn.Conv2d(dim, dim, kernel_size=3, stride=1, padding=0, bias=use_bias) # self.norm2 = adaILN(dim) self.pad2 = ReflectionPad2d(1) self.conv2 = dygraph.Conv2D(dim, dim, 3, bias_attr=use_bias) self.norm2 = adaILN(dim)
def conv2d(in_channels, out_channels, kernel_size, stride, padding): return dg.Conv2D( num_channels=in_channels, num_filters=out_channels, filter_size=kernel_size, stride=stride, padding=padding, )
def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): """3x3 convolution with padding""" return dg.Conv2D(in_planes, out_planes, filter_size=3, stride=stride, padding=dilation, groups=groups, dilation=dilation)
def __init__(self, block_expansion, num_kp, num_channels, max_features, num_blocks, temperature, estimate_jacobian=False, scale_factor=1, single_jacobian_map=False, pad=0): super(KPDetector, self).__init__() self.predictor = Hourglass(block_expansion, in_features=num_channels, max_features=max_features, num_blocks=num_blocks) self.kp = dygraph.Conv2D(num_channels=self.predictor.out_filters, num_filters=num_kp, filter_size=(7, 7), padding=pad) if estimate_jacobian: self.num_jacobian_maps = 1 if single_jacobian_map else num_kp self.jacobian = dygraph.Conv2D( num_channels=self.predictor.out_filters, num_filters=4 * self.num_jacobian_maps, filter_size=(7, 7), padding=pad) self.jacobian.weight.set_value( np.zeros(list(self.jacobian.weight.shape), dtype=np.float32)) self.jacobian.bias.set_value( np.array([1, 0, 0, 1] * self.num_jacobian_maps, dtype=np.float32)) else: self.jacobian = None self.temperature = temperature self.scale_factor = scale_factor if self.scale_factor != 1: self.down = AntiAliasInterpolation2d(num_channels, self.scale_factor)
def __init__(self, in_channel, out_channel, kernel_size=[3, 3], padding=1, stride=1, n_class=None, conditional=True, activation=layers.relu, upsample=True, downsample=False, z_dim=128, use_attention=False, skip_proj=None): super().__init__() if conditional: self.cond_norm1 = ConditionalBatchNorm(in_channel, z_dim) self.conv0 = SpectralNorm( dg.Conv2D(in_channel, out_channel, kernel_size, stride, padding)) if conditional: self.cond_norm2 = ConditionalBatchNorm(out_channel, z_dim) self.conv1 = SpectralNorm( dg.Conv2D(out_channel, out_channel, kernel_size, stride, padding)) self.skip_proj = False if skip_proj is not True and (upsample or downsample): self.conv_sc = SpectralNorm( dg.Conv2D(in_channel, out_channel, 1, 1, 0)) self.skip_proj = True if use_attention: self.attention = SelfAttention(out_channel) self.upsample = upsample self.downsample = downsample self.activation = activation self.conditional = conditional self.use_attention = use_attention
def __init__(self, in_dim, activation=layers.relu): super().__init__() self.chanel_in = in_dim self.activation = activation self.theta = SpectralNorm( dg.Conv2D(in_dim, in_dim // 8, 1, bias_attr=False)) self.phi = SpectralNorm( dg.Conv2D(in_dim, in_dim // 8, 1, bias_attr=False)) self.pool = dg.Pool2D(2, 'max', 2) self.g = SpectralNorm( dg.Conv2D(in_dim, in_dim // 2, 1, bias_attr=False)) self.o_conv = SpectralNorm( dg.Conv2D(in_dim // 2, in_dim, 1, bias_attr=False)) self.gamma = self.create_parameter([ 1, ], default_initializer=Constant(0.0)) self.softmax = SoftMax(axis=-1)
def __init__(self, in_features, out_features, groups=1, kernel_size=3, padding=1): super(SameBlock2d, self).__init__() self.conv = dygraph.Conv2D(num_channels=in_features, num_filters=out_features, filter_size=kernel_size, padding=padding, groups=groups) self.norm = dygraph.BatchNorm(out_features)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = dg.BatchNorm self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = dg.Conv2D(3, self.inplanes, filter_size=7, stride=2, padding=3, bias_attr=False) self.bn1 = norm_layer(self.inplanes) self.relu = ReLU() self.maxpool = dg.Pool2D(pool_size=3, pool_type='max', pool_stride=2, pool_padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = lambda x: L.adaptive_pool2d(x, (1, 1), pool_type='avg') self.fc = dg.Linear(512 * block.expansion, num_classes) for m in self.sublayers(): if isinstance(m, dg.Conv2D): m.param_attr = F.ParamAttr(initializer=F.initializer.MSRAInitializer()) elif isinstance(m, (dg.BatchNorm, dg.GroupNorm)): m.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0)) m.bias_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0)) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.sublayers(): if isinstance(m, Bottleneck): m.bn3.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0)) elif isinstance(m, BasicBlock): m.bn2.param_attr = F.ParamAttr(initializer=F.initializer.ConstantInitializer(value=0.0))
def conv2d(in_channels, out_channels, kernel_size, stride, padding, group=1, bias=None): return dg.Conv2D(num_channels=in_channels, num_filters=out_channels, filter_size=kernel_size, stride=stride, padding=padding, groups=group, bias_attr=bias)
def __init__(self, block_expansion, num_blocks, max_features, num_kp, num_channels, estimate_occlusion_map=False, scale_factor=1, kp_variance=0.01, **kwargs): super(DenseMotionNetwork, self).__init__(**kwargs) self.hourglass = Hourglass(block_expansion=block_expansion, in_features=(num_kp + 1) * (num_channels + 1), max_features=max_features, num_blocks=num_blocks) self.mask = dygraph.Conv2D(self.hourglass.out_filters, num_kp + 1, filter_size=(7, 7), padding=(3, 3)) if estimate_occlusion_map: self.occlusion = dygraph.Conv2D(self.hourglass.out_filters, 1, filter_size=(7, 7), padding=(3, 3)) else: self.occlusion = None self.num_kp = num_kp self.scale_factor = scale_factor self.kp_variance = kp_variance if self.scale_factor != 1: self.down = AntiAliasInterpolation2d(num_channels, self.scale_factor)
def __init__(self): super(MNIST, self).__init__() self.cnn = dy.Conv2D(num_channels=3, num_filters=1, filter_size=3, stride=1, padding=1, act='relu') self.cls = dy.Sequential( dy.Linear(input_dim=784, output_dim=128), dy.Dropout(p=.2), dy.Linear(input_dim=128, output_dim=5), )
def __init__(self, input_channels, num_filter, groups, name=None, use_bias=False): super(conv_block, self).__init__() self._layers = [] i = 0 self.conv_in = dygraph.Conv2D( num_channels=input_channels, num_filters=num_filter, filter_size=3, stride=1, padding=1, act='relu', param_attr=fluid.param_attr.ParamAttr(name=name + str(i + 1) + "_weights"), bias_attr=False if not use_bias else fluid.param_attr.ParamAttr( name=name + str(i + 1) + "_bias")) if groups == 1: return for i in range(1, groups): _a = dygraph.Conv2D( num_channels=num_filter, num_filters=num_filter, filter_size=3, stride=1, padding=1, act='relu', param_attr=fluid.param_attr.ParamAttr(name=name + str(i + 1) + "_weights"), bias_attr=False if not use_bias else fluid.param_attr.ParamAttr(name=name + str(i + 1) + "_bias")) self._layers.append(_a) self.conv = dygraph.Sequential(*self._layers)
def __init__(self): super(HarFcn, self).__init__() self.cnn1 = dy.Sequential( dy.Conv2D(num_channels=1, num_filters=128, filter_size=3, stride=1, padding=1), dy.BatchNorm(num_channels=128), dy.Dropout(p=.2), ) self.cnn2 = dy.Sequential( dy.Conv2D(num_channels=128, num_filters=128, filter_size=3, stride=1, padding=1), dy.BatchNorm(num_channels=128), dy.Dropout(p=.2), ) self.cnn3 = dy.Sequential( dy.Conv2D(num_channels=128, num_filters=128, filter_size=3, stride=1, padding=1), dy.BatchNorm(num_channels=128), dy.Dropout(p=.2), ) self.cls = dy.Sequential( dy.Linear(input_dim=384, output_dim=128), dy.Dropout(p=.2), dy.Linear(input_dim=128, output_dim=5), )
def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, padding=1, act='relu'): super().__init__() self.conv = dg.Conv2D(num_channels=in_channels, num_filters=out_channels, filter_size=kernel_size, stride=stride, padding=padding, bias_attr=False) self.bn = dg.BatchNorm(num_channels=out_channels, act=act)
def convbn(in_channel, out_channel, padding=None, stride=1, kernel=3, act=None): if padding == None: padding = int((kernel - 1) / 2) return fluid.dygraph.Sequential( dygraph.Conv2D(in_channel, out_channel, kernel, stride=stride, padding=padding, act=act), dygraph.BatchNorm(out_channel))
def __init__(self, in_features, out_features, kernel_size=3, padding=1, groups=1): super(DownBlock2d, self).__init__() self.conv = dygraph.Conv2D(num_channels=in_features, num_filters=out_features, filter_size=kernel_size, padding=padding, groups=groups) self.norm = dygraph.BatchNorm(num_channels=out_features, momentum=0.1) self.pool = dygraph.Pool2D(pool_size=(2, 2), pool_type='avg', pool_stride=2)
def __init__(self, num_channels, num_filters, filter_size, padding, use_cudnn=False, ): super(ConvPoolLayer, self).__init__() self._conv2d = D.Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, padding=padding, use_cudnn=use_cudnn, act='tanh')
def __init__(self, code_dim=128, n_class=1000, chn=96, blocks_with_attention="B4", resolution=512): super().__init__() def GBlock(in_channel, out_channel, n_class, z_dim, use_attention): return ResBlock(in_channel, out_channel, n_class=n_class, z_dim=z_dim, use_attention=use_attention) self.embed_y = dg.Linear(n_class, 128, bias_attr=False) self.chn = chn self.resolution = resolution self.blocks_with_attention = set(blocks_with_attention.split(",")) self.blocks_with_attention.discard('') gblock = [] in_channels, out_channels = self.get_in_out_channels() self.num_split = len(in_channels) + 1 z_dim = code_dim // self.num_split + 128 self.noise_fc = SpectralNorm( dg.Linear(code_dim // self.num_split, 4 * 4 * in_channels[0])) self.sa_ids = [ int(s.split('B')[-1]) for s in self.blocks_with_attention ] for i, (nc_in, nc_out) in enumerate(zip(in_channels, out_channels)): gblock.append( GBlock(nc_in, nc_out, n_class=n_class, z_dim=z_dim, use_attention=(i + 1) in self.sa_ids)) self.blocks = dg.LayerList(gblock) self.output_layer_bn = BatchNorm(1 * chn, epsilon=1e-5) self.output_layer_conv = SpectralNorm( dg.Conv2D(1 * chn, 3, [3, 3], padding=1))
def Conv2D(num_channels, num_filters, filter_size, stride=1, padding=0, dilation=1, groups=1, param_attr=None, bias_attr=None, use_cudnn=True, act=None, dtype='float32'): # a conv2d layer with weight norm wrapper conv = dg.Conv2D(num_channels, num_filters, filter_size, stride, padding, dilation, groups, param_attr, bias_attr, use_cudnn, act, dtype) conv = WeightNormWrapper(conv, dim=0) return conv
def __init__(self, channels, scale): super(AntiAliasInterpolation2d, self).__init__() sigma = (1 / scale - 1) / 2 kernel_size = 2 * round(sigma * 4) + 1 self.ka = kernel_size // 2 self.kb = self.ka - 1 if kernel_size % 2 == 0 else self.ka kernel_size = [kernel_size, kernel_size] sigma = [sigma, sigma] # The gaussian kernel is the product of the # gaussian function of each dimension. kernel = 1 # TODO: kernel DO NOT NEED BP, initialized in cpu by numpy meshgrids = np.meshgrid( *[np.arange(size, dtype=np.float32) for size in kernel_size]) meshgrids = [i.T for i in meshgrids] for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= np.exp(-(mgrid - mean)**2 / (2 * std**2)) # Make sure sum of values in gaussian kernel equals 1. kernel = kernel / np.sum(kernel) # Reshape to depthwise convolutional weight kernel = kernel.reshape(1, 1, *kernel.shape) kernel = kernel.repeat( channels, 0) # [1, 1, *kernel.shape] -> [channels, 1, *kernel.shape] self.kernel_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer(kernel), trainable=False) self.kernel = self.create_parameter(kernel.shape, attr=self.kernel_attr, dtype="float32") self.groups = channels self.scale = scale self.conv = dygraph.Conv2D(channels, channels, filter_size=kernel.shape[-1], groups=self.groups, param_attr=self.kernel_attr, bias_attr=False) self.conv.weight.set_value(kernel)
def convpool(in_channel, out_channel, padding=None, pooling=2, kernel=3, act='relu'): if padding == None: padding = int((kernel - 1) / 2) layers = [ dygraph.Conv2D(in_channel, out_channel, kernel, padding=padding, act=act), dygraph.BatchNorm(out_channel) ] if pooling > 1: layers.append(dygraph.Pool2D(pooling, pool_stride=pooling)) return fluid.dygraph.Sequential(*layers)
def __init__(self, input_channels, output_channels, kernel_size, gain=2**(0.5), use_wscale=False, lrmul=1.0, bias=True): super().__init__() self.kernel_size = kernel_size self.output_channels = output_channels he_std = gain * (input_channels * kernel_size**2)**(-0.5) # He init if use_wscale: init_std = 1.0 / lrmul self.w_lrmul = he_std * lrmul else: init_std = he_std / lrmul self.w_lrmul = lrmul w = np.random.randn(output_channels, input_channels, kernel_size, kernel_size) * he_std self.weight_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer(w)) if bias: self.b_lrmul = lrmul b = np.random.randn(output_channels) * self.b_lrmul self.bias_attr = fluid.ParamAttr( initializer=fluid.initializer.NumpyArrayInitializer(b)) else: self.bias_attr = False self.conv2d = dygraph.Conv2D(input_channels, output_channels, kernel_size, padding=kernel_size // 2, param_attr=self.weight_attr, bias_attr=self.bias_attr)
def __init__(self, num_channels=3, block_expansion=64, num_blocks=4, max_features=512, sn=False, use_kp=False, num_kp=10, kp_variance=0.01, **kwargs): super(Discriminator, self).__init__() down_blocks = [] for i in range(num_blocks): down_blocks.append( DownBlock2d(num_channels + num_kp * use_kp if i == 0 else min( max_features, block_expansion * (2**i)), min(max_features, block_expansion * (2**(i + 1))), norm=(i != 0), kernel_size=4, pool=(i != num_blocks - 1), sn=sn)) self.down_blocks = dygraph.LayerList(down_blocks) self.conv = dygraph.Conv2D( self.down_blocks[len(self.down_blocks) - 1].conv.parameters()[0].shape[0], 1, filter_size=1) if sn: self.sn = dygraph.SpectralNorm(self.conv.parameters()[0].shape, dim=0) else: self.sn = None self.use_kp = use_kp self.kp_variance = kp_variance
def __init__(self, in_features, out_features, norm=False, kernel_size=4, pool=False, sn=False): super(DownBlock2d, self).__init__() self.conv = dygraph.Conv2D(in_features, out_features, filter_size=kernel_size) if sn: self.sn = dygraph.SpectralNorm(self.conv.weight.shape, dim=0) else: self.sn = None if norm: self.norm = dygraph.InstanceNorm(num_channels=out_features, epsilon=1e-05, dtype='float32') else: self.norm = None self.pool = pool
def __init__(self, config): super(Flow, self).__init__() self.n_layers = config.n_layers self.n_channels = config.n_channels self.kernel_h = config.kernel_h self.kernel_w = config.kernel_w self.dtype = "float16" if config.use_fp16 else "float32" # Transform audio: [batch, 1, n_group, time/n_group] # => [batch, n_channels, n_group, time/n_group] param_attr, bias_attr = get_param_attr("weight_norm", (1, 1), c_in=1) self.start = weight_norm.Conv2D(num_channels=1, num_filters=self.n_channels, filter_size=(1, 1), param_attr=param_attr, bias_attr=bias_attr, dtype=self.dtype) # Initializing last layer to 0 makes the affine coupling layers # do nothing at first. This helps with training stability # output shape: [batch, 2, n_group, time/n_group] param_attr, bias_attr = get_param_attr("common", (1, 1), c_in=self.n_channels) self.end = dg.Conv2D(num_channels=self.n_channels, num_filters=2, filter_size=(1, 1), param_attr=param_attr, bias_attr=bias_attr, dtype=self.dtype) # receiptive fileds: (kernel - 1) * sum(dilations) + 1 >= squeeze dilation_dict = { 8: [1, 1, 1, 1, 1, 1, 1, 1], 16: [1, 1, 1, 1, 1, 1, 1, 1], 32: [1, 2, 4, 1, 2, 4, 1, 2], 64: [1, 2, 4, 8, 16, 1, 2, 4], 128: [1, 2, 4, 8, 16, 32, 64, 1] } self.dilation_h_list = dilation_dict[config.n_group] self.in_layers = [] self.cond_layers = [] self.res_skip_layers = [] for i in range(self.n_layers): dilation_h = self.dilation_h_list[i] dilation_w = 2**i param_attr, bias_attr = get_param_attr( "weight_norm", (self.kernel_h, self.kernel_w), c_in=self.n_channels) in_layer = weight_norm.Conv2D(num_channels=self.n_channels, num_filters=2 * self.n_channels, filter_size=(self.kernel_h, self.kernel_w), dilation=(dilation_h, dilation_w), param_attr=param_attr, bias_attr=bias_attr, dtype=self.dtype) self.in_layers.append(in_layer) param_attr, bias_attr = get_param_attr("weight_norm", (1, 1), c_in=config.mel_bands) cond_layer = weight_norm.Conv2D(num_channels=config.mel_bands, num_filters=2 * self.n_channels, filter_size=(1, 1), param_attr=param_attr, bias_attr=bias_attr, dtype=self.dtype) self.cond_layers.append(cond_layer) if i < self.n_layers - 1: res_skip_channels = 2 * self.n_channels else: res_skip_channels = self.n_channels param_attr, bias_attr = get_param_attr("weight_norm", (1, 1), c_in=self.n_channels) res_skip_layer = weight_norm.Conv2D(num_channels=self.n_channels, num_filters=res_skip_channels, filter_size=(1, 1), param_attr=param_attr, bias_attr=bias_attr, dtype=self.dtype) self.res_skip_layers.append(res_skip_layer) self.add_sublayer("in_layer_{}".format(i), in_layer) self.add_sublayer("cond_layer_{}".format(i), cond_layer) self.add_sublayer("res_skip_layer_{}".format(i), res_skip_layer)