def __init__(self, cf, n_class=21, pretrained=False, net_name='fcn16'): super(FCN16, self).__init__(cf) # conv1 self.conv1_1 = nn.Conv2d(3, 64, 3, padding=100) self.relu1_1 = nn.ReLU(inplace=True) self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1) self.relu1_2 = nn.ReLU(inplace=True) self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/2 # conv2 self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1) self.relu2_1 = nn.ReLU(inplace=True) self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1) self.relu2_2 = nn.ReLU(inplace=True) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/4 # conv3 self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1) self.relu3_1 = nn.ReLU(inplace=True) self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1) self.relu3_2 = nn.ReLU(inplace=True) self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1) self.relu3_3 = nn.ReLU(inplace=True) self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/8 # conv4 self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1) self.relu4_1 = nn.ReLU(inplace=True) self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1) self.relu4_2 = nn.ReLU(inplace=True) self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1) self.relu4_3 = nn.ReLU(inplace=True) self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/16 # conv5 self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_1 = nn.ReLU(inplace=True) self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_2 = nn.ReLU(inplace=True) self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_3 = nn.ReLU(inplace=True) self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/32 # fc6 self.fc6 = nn.Conv2d(512, 4096, 7) self.relu6 = nn.ReLU(inplace=True) self.drop6 = nn.Dropout2d() # fc7 self.fc7 = nn.Conv2d(4096, 4096, 1) self.relu7 = nn.ReLU(inplace=True) self.drop7 = nn.Dropout2d() self.score_fr = nn.Conv2d(4096, n_class, 1) self.score_pool4 = nn.Conv2d(512, n_class, 1) self.upscore2 = nn.ConvTranspose2d(n_class, n_class, 4, stride=2, bias=False) self.upscore16 = nn.ConvTranspose2d(n_class, n_class, 32, stride=16, bias=False) self._initialize_weights() if pretrained: self.load_basic_weights(net_name)
def __init__(self, in_channels, n_filters, k_size, stride, padding, bias=True): super(deconv2D, self).__init__() self.dcb_unit = nn.Sequential(nn.ConvTranspose2d(int(in_channels), int(n_filters), kernel_size=k_size, padding=padding, stride=stride, bias=bias), )
def __init__(self, input_nc, output_nc, ngf=32, norm_layer=nn.BatchNorm2d, use_dropout=False, n_blocks=3, padding_type='reflect'): super(Autoencoder, self).__init__() use_bias = norm_layer == nn.InstanceNorm2d model = [nn.Conv2d(input_nc, ngf, kernel_size=1)] model = [ #nn.ReflectionPad2d(1), nn.Conv2d(input_nc, ngf, kernel_size=7, padding=0, bias=use_bias), norm_layer(ngf), nn.ReLU(True) ] n_downsampling = 4 # Special case for 9th block of resnet #n_downsampling, n_blocks = 0, 0 for i in range(n_downsampling): # add downsampling layers mult = 2**i model += [ nn.Conv2d(ngf * mult, ngf * mult * 2, kernel_size=3, stride=2, padding=1, bias=use_bias), norm_layer(ngf * mult * 2), nn.ReLU(True) ] mult = 2**n_downsampling for i in range(n_blocks): # add ResNet blocks model += [ ResnetBlock(ngf * mult, padding_type=padding_type, norm_layer=norm_layer, use_dropout=use_dropout, use_bias=use_bias) ] for i in range(n_downsampling): # add upsampling layers mult = 2**(n_downsampling - i) model += [ nn.ConvTranspose2d(ngf * mult, int(ngf * mult / 2), kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU(True) ] n_upsampling_extra = 1 for i in range(n_upsampling_extra): # add upsampling layers model += [ nn.ConvTranspose2d(ngf, ngf, kernel_size=3, stride=2, padding=1, output_padding=1, bias=use_bias), norm_layer(ngf), nn.ReLU(True) ] if i == 3: model += [ nn.Conv2d(ngf, ngf, kernel_size=3, stride=1, padding=0), norm_layer(ngf), nn.ReLU(True) ] #""" model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf, ngf // 2, kernel_size=7, padding=0)] model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf // 2, ngf // 4, kernel_size=5, padding=0)] model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(ngf // 4, output_nc, kernel_size=5, padding=0)] model += [nn.ReflectionPad2d(3)] model += [nn.Conv2d(output_nc, output_nc, kernel_size=5, padding=0)] self.m = nn.Sequential(*model)
def __init__(self, input_nc, output_nc, n_residual_blocks=9): """ 定义生成网络 参数: input_nc --输入通道数 output_nc --输出通道数 n_residual_blocks --残差模块数量 """ super(Generator, self).__init__() # 初始化卷积模块 # 因为使用ReflectionPad扩充 # 所以输入是3*256*256 # 输出是64*256*256 model = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, 64, 7), nn.InstanceNorm2d(64), nn.ReLU(inplace=True) ] # 进行下采样 # 第一个range:输入是64*256*256,输出是128*128*128 # 第二个range:输入是128*128*128,输出是256*64*64 in_features = 64 out_features = in_features * 2 for _ in range(2): model += [ nn.Conv2d(in_features, out_features, 3, stride=2, padding=1), nn.InstanceNorm2d(out_features), nn.ReLU(inplace=True) ] in_features = out_features out_features = in_features * 2 # 使用残差模块 # 输入输出都是256*64*64 for _ in range(n_residual_blocks): # 默认添加9个残差模块 model += [ResidualBlock(in_features)] # 进行上采样 # 第一个range:输入是256*64*64,输出是128*128*128 # 第二个range:输入是128*128*128,输出是64*256*256 out_features = in_features // 2 for _ in range(2): model += [ nn.ConvTranspose2d(in_features, out_features, 3, stride=2, padding=1, output_padding=1), nn.InstanceNorm2d(out_features), nn.ReLU(inplace=True) ] in_features = out_features out_features = in_features // 2 # 最后输出层 # 输入是64*256*256 # 输出是3*256*256 model += [ nn.ReflectionPad2d(3), nn.Conv2d(64, output_nc, 7), nn.Tanh() ] self.model = nn.Sequential(*model)
def deconv_block(self, in_channels, out_channels): return nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2, bias=False), nn.ReLU(inplace=True) )
def __init__(self, in_channels, out_channels, kernel_size, stride, out_padding): super(DeconvLayer, self).__init__() padding = kernel_size//2 self.frac_conv = nn.ConvTranspose2d(in_channels, out_channels, kernel_size, stride, padding, out_padding) self.instance_norm = nn.InstanceNorm2d(out_channels, affine=True)
def deconv(in_planes, out_planes, kernel_size=4, stride=2, padding=1): return nn.ConvTranspose2d(in_planes, out_planes, kernel_size, stride, padding, bias=True)
def __init__(self, size, num_classes, use_refine=False): super(RefineSSD, self).__init__() self.num_classes = num_classes # TODO: implement __call__ in PriorBox self.size = size self.use_refine = use_refine # SSD network self.base = nn.ModuleList(vgg(vgg_base['320'], 3)) # Layer learns to scale the l2 normalized features from conv4_3 self.L2Norm_4_3 = L2Norm(512, 10) self.L2Norm_5_3 = L2Norm(512, 8) //Extra layers Cov6_1,Cov_6_2 self.extras = nn.Sequential(nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0), nn.ReLU(inplace=True), \ nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1), nn.ReLU(inplace=True)) //P6 three 3*3*256 self.last_layer_trans = nn.Sequential(nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)) if use_refine: self.arm_loc = nn.ModuleList([nn.Conv2d(512, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(512, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(1024, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(512, 12, kernel_size=3, stride=1, padding=1), \ ]) self.arm_conf = nn.ModuleList([nn.Conv2d(512, 6, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(512, 6, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(1024, 6, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(512, 6, kernel_size=3, stride=1, padding=1), \ ]) self.odm_loc = nn.ModuleList([nn.Conv2d(256, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 12, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 12, kernel_size=3, stride=1, padding=1), \ ]) self.odm_conf = nn.ModuleList([nn.Conv2d(256, 3*num_classes, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 3*num_classes, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 3*num_classes, kernel_size=3, stride=1, padding=1), \ nn.Conv2d(256, 3*num_classes, kernel_size=3, stride=1, padding=1), \ ]) self.trans_layers = nn.ModuleList([nn.Sequential(nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)), \ nn.Sequential(nn.Conv2d(512, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)), \ nn.Sequential(nn.Conv2d(1024, 256, kernel_size=3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)), \ ]) self.up_layers = nn.ModuleList([nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0), nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0), nn.ConvTranspose2d(256, 256, kernel_size=2, stride=2, padding=0), ]) self.latent_layrs = nn.ModuleList([nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1), ]) self.softmax = nn.Softmax()
def __init__(self, in_class=1, n_class=1): super(FCN32s, self).__init__() # conv1 self.conv1_1 = nn.Conv2d(in_class, 64, 3, padding=100) self.relu1_1 = nn.ReLU(inplace=True) self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1) self.relu1_2 = nn.ReLU(inplace=True) self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/2 # conv2 self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1) self.relu2_1 = nn.ReLU(inplace=True) self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1) self.relu2_2 = nn.ReLU(inplace=True) self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/4 # conv3 self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1) self.relu3_1 = nn.ReLU(inplace=True) self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1) self.relu3_2 = nn.ReLU(inplace=True) self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1) self.relu3_3 = nn.ReLU(inplace=True) self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/8 # conv4 self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1) self.relu4_1 = nn.ReLU(inplace=True) self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1) self.relu4_2 = nn.ReLU(inplace=True) self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1) self.relu4_3 = nn.ReLU(inplace=True) self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/16 # conv5 self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_1 = nn.ReLU(inplace=True) self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_2 = nn.ReLU(inplace=True) self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1) self.relu5_3 = nn.ReLU(inplace=True) self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True) # 1/32 # fc6 self.fc6 = nn.Conv2d(512, 4096, 7) self.relu6 = nn.ReLU(inplace=True) self.drop6 = nn.Dropout2d() # fc7 self.fc7 = nn.Conv2d(4096, 4096, 1) self.relu7 = nn.ReLU(inplace=True) self.drop7 = nn.Dropout2d() self.score_fr = nn.Conv2d(4096, n_class, 1) self.upscore = nn.ConvTranspose2d(n_class, n_class, 64, stride=32, bias=False) self._initialize_weights()
def __init__(self, batch_norm=False, with_confidence=False, clamp=False, depth_activation=None): super(DepthNet, self).__init__() self.clamp = clamp if depth_activation == 'elu': self.depth_activation = lambda x: nn.functional.elu(x) + 1 else: self.depth_activation = depth_activation self.conv1 = conv(6, 32, stride=2, batch_norm=batch_norm) self.conv2 = conv(32, 64, stride=2, batch_norm=batch_norm) self.conv3 = conv(64, 128, stride=2, batch_norm=batch_norm) self.conv3_1 = conv(128, 128, batch_norm=batch_norm) self.conv4 = conv(128, 256, stride=2, batch_norm=batch_norm) self.conv4_1 = conv(256, 256, batch_norm=batch_norm) self.conv5 = conv(256, 256, stride=2, batch_norm=batch_norm) self.conv5_1 = conv(256, 256, batch_norm=batch_norm) self.conv6 = conv(256, 512, stride=2, batch_norm=batch_norm) self.conv6_1 = conv(512, 512, batch_norm=batch_norm) self.deconv5 = deconv(512, 256, batch_norm=batch_norm) self.deconv4 = deconv(513, 128, batch_norm=batch_norm) self.deconv3 = deconv(385, 64, batch_norm=batch_norm) self.deconv2 = deconv(193, 32, batch_norm=batch_norm) self.predict_depth6 = predict_depth(512, with_confidence) self.predict_depth5 = predict_depth(513, with_confidence) self.predict_depth4 = predict_depth(385, with_confidence) self.predict_depth3 = predict_depth(193, with_confidence) self.predict_depth2 = predict_depth(97, with_confidence) self.upsampled_depth6_to_5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_depth5_to_4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_depth4_to_3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) self.upsampled_depth3_to_2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) init_modules(self)
def __init__(self, input_nc, output_nc, ngf=32, n_downsample_global=3, n_blocks_global=9, n_local_enhancers=1, n_blocks_local=3, norm_layer=nn.BatchNorm2d, padding_type='reflect'): super(LocalEnhancer, self).__init__() self.n_local_enhancers = n_local_enhancers ###### global generator model ##### ngf_global = ngf * (2**n_local_enhancers) model_global = GlobalGenerator(input_nc, output_nc, ngf_global, n_downsample_global, n_blocks_global, norm_layer).model model_global = [model_global[i] for i in range(len(model_global) - 3) ] # get rid of final convolution layers self.model = nn.Sequential(*model_global) ###### local enhancer layers ##### for n in range(1, n_local_enhancers + 1): ### downsample ngf_global = ngf * (2**(n_local_enhancers - n)) model_downsample = [ nn.ReflectionPad2d(3), nn.Conv2d(input_nc, ngf_global, kernel_size=7, padding=0), norm_layer(ngf_global), nn.ReLU(True), nn.Conv2d(ngf_global, ngf_global * 2, kernel_size=3, stride=2, padding=1), norm_layer(ngf_global * 2), nn.ReLU(True) ] ### residual blocks model_upsample = [] for i in range(n_blocks_local): model_upsample += [ ResnetBlock(ngf_global * 2, padding_type=padding_type, norm_layer=norm_layer) ] ### upsample model_upsample += [ nn.ConvTranspose2d(ngf_global * 2, ngf_global, kernel_size=3, stride=2, padding=1, output_padding=1), norm_layer(ngf_global), nn.ReLU(True) ] ### final convolution if n == n_local_enhancers: model_upsample += [ nn.ReflectionPad2d(3), nn.Conv2d(ngf, output_nc, kernel_size=7, padding=0), nn.Tanh() ] setattr(self, 'model' + str(n) + '_1', nn.Sequential(*model_downsample)) setattr(self, 'model' + str(n) + '_2', nn.Sequential(*model_upsample)) self.downsample = nn.AvgPool2d(3, stride=2, padding=[1, 1], count_include_pad=False)
def __init__(self, Data = 'MNIST', batch_size = 100, z_dim=128): #Data = 'MNIST' or 'CIFAR' super(CNN_VAE, self).__init__() if Data == 'MNIST': self.dim = {'batch_size': batch_size, 'hight':28, 'pixels':784, 'channels':1} elif Data == 'CIFAR': self.dim = {'batch_size': batch_size, 'hight':32, 'pixels':1024, 'channels':3} else: raise ValueError("Data set not support") self.z_dim = z_dim if Data == 'MNIST': self.encoder = nn.Sequential( # input is (nc) x 28 x 28 nn.Conv2d(1, ndf, 4, 2, 1, bias=False), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf) x 14 x 14 nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 7 x 7 nn.Conv2d(ndf * 2, ndf * 4, 3, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 4 x 4 flatten(), nn.Linear(4096,2048), nn.LeakyReLU(0.2, inplace=True), nn.Linear(2048,1024), nn.LeakyReLU(0.2, inplace=True), nn.Linear(1024,400), ) elif Data == 'CIFAR': self.encoder = nn.Sequential( # input is (nc) x 32 x 32 nn.Conv2d(3, ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 2), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*2) x 16 x 16 nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf * 4), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*4) x 8 x 8 nn.Conv2d(ndf * 4, ndf, 4, 2, 1, bias=False), nn.BatchNorm2d(ndf), nn.LeakyReLU(0.2, inplace=True), # state size. (ndf*8) x 4 x 4 flatten(), nn.Linear(1024,512), nn.LeakyReLU(0.2, inplace=True), nn.Linear(512,400), ) self.fc_mu = nn.Linear(400, self.z_dim) self.fc_logvar = nn.Linear(400, self.z_dim) if Data == 'MNIST': self.decoder = nn.Sequential( unFlatten(), # input is Z, going into a convolution nn.ConvTranspose2d( z_dim, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d( ngf * 2, 3, 4, 2, 1, bias=False), nn.Sigmoid() ) elif Data == 'CIFAR': self.decoder = nn.Sequential( unFlatten(), # input is Z, going into a convolution nn.ConvTranspose2d( z_dim, ngf * 8, 4, 1, 0, bias=False), nn.BatchNorm2d(ngf * 8), nn.ReLU(True), # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 4), nn.ReLU(True), # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d( ngf * 4, ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(ngf * 2), nn.ReLU(True), # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d( ngf * 2, 3, 4, 2, 1, bias=False), nn.Sigmoid() )
def __init__(self, num_classes=22, backbone='efficientnet-b1', seperate_flow_head=False, pred_flow_pyramid=True, pred_flow_pyramid_add=True, ced_real=1, ced_render=1, ced_render_d=1, ced_real_d=1): # tested with b6 super().__init__() self.feature_extractor = EfficientNet.from_pretrained(backbone) self.size = self.feature_extractor.get_image_size(backbone) self.seperate_flow_head = seperate_flow_head self.ced_real = ced_real self.ced_render = ced_render self.ced_real_d = ced_real_d self.ced_render_d = ced_render_d self.pred_flow_pyramid_add = pred_flow_pyramid_add self.pred_flow_pyramid = pred_flow_pyramid idxs, feats, res = self.feature_extractor.layer_info( torch.ones((4, 3, self.size, self.size))) if ced_render_d > 0 or ced_real_d > 0: self.depth_backbone = True else: self.depth_backbone = False if self.depth_backbone: self.feature_extractor_depth = EfficientNet.from_name( backbone, in_channels=1) r = res[0] self.idx_extract = [] self.feature_sizes = [] for i in range(len(idxs)): if r != res[i]: self.idx_extract.append(i - 1) r = res[i] self.feature_sizes.append(feats[i - 1]) self.idx_extract.append(len(idxs) - 1) self.feature_sizes.append(feats[len(idxs) - 1]) self._num_classes = num_classes dc = [] pred_flow_pyramid = [] upsample_flow_layers = [] self.feature_sizes = [8] + self.feature_sizes label_feat = [16, 8, num_classes] label_layers = [] label_i = -1 for i in range(1, len(self.feature_sizes)): if i == 1: inc_feat_0 = (int(ced_real > 0) + int(ced_render > 0) + int(ced_render_d > 0) + int(ced_real_d > 0)) * self.feature_sizes[-i] else: inc_feat_0 = (int(ced_real >= i) + int(ced_render >= i) + int(ced_render_d >= i) + int(ced_real_d >= i) + 1) * self.feature_sizes[-i] if self.pred_flow_pyramid_add and self.pred_flow_pyramid: inc_feat_0 += 2 out_feat = self.feature_sizes[ -(i + 1)] #leave this number for now on constant dc.append(deconv(inc_feat_0, out_feat)) print('Network inp:', inc_feat_0, ' out: ', out_feat) if i > len(self.feature_sizes) - len(label_feat): if label_i == -1: inc_feat_label = inc_feat_0 else: inc_feat_label = label_feat[label_i] label_i += 1 out_feat_label = label_feat[label_i] label_layers.append( deconv(inc_feat_label, out_feat_label, bias=True)) if self.pred_flow_pyramid: pred_flow_pyramid.append(predict_flow(inc_feat_0)) upsample_flow_layers.append( nn.ConvTranspose2d(2, 2, 4, 2, 1, bias=False)) label_layers.append(deconv(label_feat[-2], label_feat[-1], bias=True)) self.label_layers = nn.ModuleList(label_layers) self.deconvs = nn.ModuleList(dc) pred_flow_pyramid.append(predict_flow(self.feature_sizes[0])) if self.pred_flow_pyramid: self.pred_flow_pyramid = nn.ModuleList(pred_flow_pyramid) self.upsample_flow_layers = nn.ModuleList(upsample_flow_layers) self.up_in = torch.nn.UpsamplingBilinear2d(size=(self.size, self.size)) self.input_trafos = transforms.Compose([ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) self.norm_depth = transforms.Normalize([0.485, 0.485], [0.229, 0.229]) self.up_out = torch.nn.UpsamplingNearest2d(size=(480, 640)) self.up_out_bl = torch.nn.UpsamplingBilinear2d(size=(480, 640)) self.up_nn_in = torch.nn.UpsamplingNearest2d(size=(self.size, self.size))
def __init__(self, input_size=(3, 64, 64), kernel_sizes=[32, 32, 64, 64], hidden_size=256, dim_z=32, binary=True, **kwargs): """initialize neural networks :param input_size: C x H x W :param kernel_sizes: number of channels in the kernels :param hidden_size: size of hidden layer :param dim_z: dimension of latent variable z :param binary: whether the data is binary """ super(ConvVAE, self).__init__() self.input_size = input_size self.channel_sizes = [input_size[0]] + kernel_sizes self.hidden_size = hidden_size self.dim_z = dim_z self.binary = binary # initialize encoder layers self.conv_encoder = nns.create_cnn2d(input_size[0], kernel_sizes, (4, 4), 2, 1) # encoder conv layer output size conv_out_size = int(input_size[-1] / (2**len(kernel_sizes))) self.conv_output_size = (self.channel_sizes[-1], conv_out_size, conv_out_size) self.flat_conv_output_size = np.prod(self.conv_output_size) # layers transfer features to hidden units self.features_to_hidden = nns.create_mlp(self.flat_conv_output_size, [hidden_size]) # Gaussian MLP self.fc_mean = nn.Linear(hidden_size, dim_z) self.fc_logvar = nn.Linear(hidden_size, dim_z) # layers transform latent variables to features (via hidden units) self.latent_to_features = nns.create_mlp( dim_z, [hidden_size, self.flat_conv_output_size]) # initialize decoder layers self.conv_decoder = nns.create_transpose_cnn2d( self.channel_sizes[-1], self.channel_sizes[-2:0:-1], (4, 4), 2, 1, 0) self.conv_decoder = nn.Sequential( self.conv_decoder, nn.ConvTranspose2d(self.channel_sizes[1], self.channel_sizes[0], (4, 4), stride=2, padding=1)) # the final layer if binary: # for binary data use sigmoid activation function self.conv_decoder = nn.Sequential(self.conv_decoder, nn.Sigmoid()) else: # for non-binary data use extra Gaussian MLP (add a logvar transposed cnn) self.conv_decoder_logvar = nns.create_transpose_cnn2d( self.channel_sizes[-1], self.channel_sizes[-2:0:-1], (4, 4), 2, 1, 0) self.conv_decoder_logvar = nn.Sequential( self.conv_decoder, nn.ConvTranspose2d(self.channel_sizes[1], self.channel_sizes[0], (4, 4), stride=2, padding=1))
def __init__(self, in_channels, out_channels=None, dilation=1, downsample=False, proj_ratio=4, upsample=False, asymmetric=False, regularize=True, p_drop=None, use_prelu=True): super(BottleNeck, self).__init__() self.pad = 0 self.upsample = upsample self.downsample = downsample if not out_channels: out_channels = in_channels else: self.pad = out_channels - in_channels if regularize: assert p_drop is not None if downsample: assert not upsample if upsample: assert not downsample inter_channels = in_channels // proj_ratio # Main branch if upsample: self.spatil_conv = nn.Conv2d(in_channels, out_channels, 1, bias=False) self.bn_up = nn.BatchNorm2d(out_channels) self.unpool = nn.MaxUnpool2d(kernel_size=2, stride=2) elif downsample: # the indices are used for unpooling self.pool = nn.MaxPool2d(kernel_size=2, stride=2, return_indices=True) # Bottleneck # first convolution layer, reduce the dimensionality if downsample: # downsample with stride=2 self.conv1 = nn.Conv2d(in_channels, inter_channels, 2, stride=2, bias=False) else: # 1x1 conv self.conv1 = nn.Conv2d(in_channels, inter_channels, 1, bias=False) self.bn1 = nn.BatchNorm2d(inter_channels) self.prelu1 = nn.PReLU() if use_prelu else nn.ReLU(inplace=True) # second convolution layer (main conv) if asymmetric: # first 1x5 kernel size, then 5x1 kernel size self.conv2 = nn.Sequential( nn.Conv2d(inter_channels, inter_channels, kernel_size=(1, 5), padding=(0, 2)), nn.BatchNorm2d(inter_channels), nn.PReLU(), nn.Conv2d(inter_channels, inter_channels, kernel_size=(5, 1), padding=(2, 0))) elif upsample: # upsample self.conv2 = nn.ConvTranspose2d(inter_channels, inter_channels, kernel_size=3, padding=1, output_padding=1, stride=2, bias=False) else: self.conv2 = nn.Conv2d(inter_channels, inter_channels, 3, padding=dilation, dilation=dilation, bias=False) self.bn2 = nn.BatchNorm2d(inter_channels) self.prelu2 = nn.PReLU() if use_prelu else nn.ReLU(inplace=True) # third convolution layer, increase dimensionality to out_channels # 1x1 conv self.conv3 = nn.Conv2d(inter_channels, out_channels, 1, bias=False) self.bn3 = nn.BatchNorm2d(out_channels) self.prelu3 = nn.PReLU() if use_prelu else nn.ReLU(inplace=True) self.regularizer = nn.Dropout2d(p_drop) if regularize else None self.prelu_out = nn.PReLU() if use_prelu else nn.ReLU(inplace=True)
def __init__( self, in_channels: int, latent_dim: int, hidden_dims: List = None, alpha: float = 100.0, beta: float = 10.0, lr: float = 0.005, weight_decay: Optional[float] = 0, scheduler_gamma: Optional[float] = 0.97, ) -> None: super(LogCoshVAE, self).__init__( lr=lr, weight_decay=weight_decay, scheduler_gamma=scheduler_gamma ) self.latent_dim = latent_dim self.alpha = alpha self.beta = beta modules = [] if hidden_dims is None: hidden_dims = [32, 64, 128, 256, 512] # Build Encoder for h_dim in hidden_dims: modules.append( nn.Sequential( nn.Conv2d( in_channels, out_channels=h_dim, kernel_size=3, stride=2, padding=1, ), nn.BatchNorm2d(h_dim), nn.LeakyReLU(), ) ) in_channels = h_dim self.encoder = nn.Sequential(*modules) self.fc_mu = nn.Linear(hidden_dims[-1] * 4, latent_dim) self.fc_var = nn.Linear(hidden_dims[-1] * 4, latent_dim) # Build Decoder modules = [] self.decoder_input = nn.Linear(latent_dim, hidden_dims[-1] * 4) hidden_dims.reverse() for i in range(len(hidden_dims) - 1): modules.append( nn.Sequential( nn.ConvTranspose2d( hidden_dims[i], hidden_dims[i + 1], kernel_size=3, stride=2, padding=1, output_padding=1, ), nn.BatchNorm2d(hidden_dims[i + 1]), nn.LeakyReLU(), ) ) self.decoder = nn.Sequential(*modules) self.final_layer = nn.Sequential( nn.ConvTranspose2d( hidden_dims[-1], hidden_dims[-1], kernel_size=3, stride=2, padding=1, output_padding=1, ), nn.BatchNorm2d(hidden_dims[-1]), nn.LeakyReLU(), nn.Conv2d(hidden_dims[-1], out_channels=3, kernel_size=3, padding=1), nn.Tanh(), )
def __init__(self, in_ch, out_ch): super(up, self).__init__() self.up = nn.ConvTranspose2d(in_ch, out_ch, kernel_size=2, stride=2) self.conv = union_conv(in_ch, out_ch)
def __init__(self): super(Naive_ae_deconv, self).__init__() self.encoder = nn.Conv2d(3, 16, kernel_size=8, stride=8) self.decoder = nn.ConvTranspose2d(16, 3, kernel_size=8, stride=8)
def __init__(self, num_convs=4, roi_feat_size=14, in_channels=256, conv_kernel_size=3, conv_out_channels=256, upsample_method='deconv', upsample_ratio=2, num_classes=81, class_agnostic=False, conv_cfg=None, norm_cfg=None, fc_conv=True, training=True, loss_mask=dict(type='CrossEntropyLoss', use_mask=True, loss_weight=1.0)): super(TCMaskHead, self).__init__() if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: raise ValueError( 'Invalid upsample method {}, accepted methods ' 'are "deconv", "nearest", "bilinear"'.format(upsample_method)) self.num_convs = num_convs # WARN: roi_feat_size is reserved and not used self.roi_feat_size = _pair(roi_feat_size) self.in_channels = in_channels self.conv_kernel_size = conv_kernel_size self.conv_out_channels = conv_out_channels self.upsample_method = upsample_method self.upsample_ratio = upsample_ratio self.num_classes = num_classes self.class_agnostic = class_agnostic self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.fp16_enabled = False self.loss_mask = build_loss(loss_mask) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.training = training self.relu = nn.ReLU(inplace=True) self.fc_conv = fc_conv self.convs = nn.ModuleList() #self.fc_convs = nn.ModuleList() #self.fc_list = nn.ModuleList() for i in range(self.num_convs - 1): in_channels = (self.in_channels if i == 0 else self.conv_out_channels) padding = (self.conv_kernel_size - 1) // 2 self.convs.append( ConvModule(in_channels, self.conv_out_channels, self.conv_kernel_size, padding=padding, conv_cfg=conv_cfg, norm_cfg=norm_cfg)) padding = (self.conv_kernel_size - 1) // 2 self.fc_conv_out_channels = (self.conv_out_channels) // 2 self.convs_last = nn.Conv2d(self.conv_out_channels, self.conv_out_channels, self.conv_kernel_size, 1, padding=padding, dilation=1, groups=1, bias=False) self.fc_convs_1 = nn.Conv2d(self.conv_out_channels, self.conv_out_channels, self.conv_kernel_size, 1, padding=padding, dilation=1, groups=1, bias=False) self.fc_convs_2 = nn.Conv2d(self.conv_out_channels, self.fc_conv_out_channels, self.conv_kernel_size, 1, padding=padding, dilation=1, groups=1, bias=False) fc_in_channels = self.fc_conv_out_channels * self.roi_feat_area linear_channel = self.roi_feat_area * 4 self.mask_fc = nn.Linear(fc_in_channels, linear_channel) #fc upsample_in_channels = (self.conv_out_channels if self.num_convs > 0 else in_channels) if self.upsample_method is None: self.upsample = None elif self.upsample_method == 'deconv': self.upsample = nn.ConvTranspose2d(upsample_in_channels, self.conv_out_channels, self.upsample_ratio, stride=self.upsample_ratio) else: self.upsample = nn.Upsample(scale_factor=self.upsample_ratio, mode=self.upsample_method) out_channels = 1 if self.class_agnostic else self.num_classes logits_in_channel = (self.conv_out_channels if self.upsample_method == 'deconv' else upsample_in_channels) self.conv_logits = nn.Conv2d(logits_in_channel, out_channels, 1) self.relu = nn.ReLU(inplace=True) self.debug_imgs = None
def __init__(self): super(Naive_ae_overlap, self).__init__() self.encoder = nn.Conv2d(3, 16, kernel_size=8, stride=4) self.decoder = nn.ConvTranspose2d(16, 3, kernel_size=8, stride=4) self.pad = nn.ReflectionPad2d(4)
def __init__(self, in_channels=3, n_mod=3, n_feature=16, n_classes=11): super(fcn_mul, self).__init__() self.in_channels = in_channels self.n_mod = n_mod self.n_feature = n_feature self.n_classes = n_classes self.conv_block1 = nn.Sequential( nn.Conv2d(self.in_channels, 64, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True), ) self.conv_block2 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True), ) self.conv_block3 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), ) self.conv_block4 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), ) self.conv_block5 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), ) self.pool = nn.MaxPool2d(2, stride=2, ceil_mode=True) self.conv1_c = nn.Conv2d(64 * self.n_mod, self.n_feature, 3, padding=1) self.conv2_c = nn.Conv2d(128 * self.n_mod, self.n_feature, 3, padding=1) self.conv3_c = nn.Conv2d(256 * self.n_mod, self.n_feature, 3, padding=1) self.conv4_c = nn.Conv2d(512 * self.n_mod, self.n_feature, 3, padding=1) self.conv5_c = nn.Conv2d(512 * self.n_mod, self.n_feature, 3, padding=1) self.deconv2 = nn.ConvTranspose2d(self.n_feature, self.n_feature, kernel_size=2, stride=2) self.deconv3 = nn.ConvTranspose2d(self.n_feature, self.n_feature, kernel_size=4, stride=4) self.deconv4 = nn.ConvTranspose2d(self.n_feature, self.n_feature, kernel_size=8, stride=8) self.deconv5 = nn.ConvTranspose2d(self.n_feature, self.n_feature, kernel_size=16, stride=16) #self.dilation=[1,3,5,8,16] #self.atrous1=nn.Sequential(nn.Conv2d(4*16,4*16,kernel_size=3,dilation=self.dilation[0],padding=self.dilation[0]),) #self.atrous2=nn.Sequential(nn.Conv2d(4*16,4*16,kernel_size=3,dilation=self.dilation[1],padding=self.dilation[1]),) #self.atrous3=nn.Sequential(nn.Conv2d(4*16,4*16,kernel_size=3,dilation=self.dilation[2],padding=self.dilation[2]),) #self.atrous4=nn.Sequential(nn.Conv2d(4*16,4*16,kernel_size=3,dilation=self.dilation[3],padding=self.dilation[3]),) #self.atrous5=nn.Sequential(nn.Conv2d(4*16,4*16,kernel_size=3,dilation=self.dilation[4],padding=self.dilation[4]),) self.score = nn.Sequential( nn.Conv2d(4 * self.n_feature, self.n_classes, 1), #nn.Dropout(0.5), )
def __init__(self, classes, device): super().__init__() self.classes = classes self.device = device self.model_cls = nn.Sequential( # 64 nn.Conv2d(in_channels=3, out_channels=128, kernel_size=4, stride=2, padding=1), nn.LeakyReLU(negative_slope=0.2), #nn.Dropout2d(p=0.2), # 32 nn.Conv2d(in_channels=128, out_channels=256, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(256), nn.LeakyReLU(negative_slope=0.2), #nn.Dropout2d(p=0.2), # 16 nn.Conv2d(in_channels=256, out_channels=512, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(512), nn.LeakyReLU(negative_slope=0.2), # 8 nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.LeakyReLU(negative_slope=0.2), #nn.Dropout2d(p=0.2), # 8 nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(1024), nn.LeakyReLU(negative_slope=0.2) # 4 # nn.Linear(1024, classes) # nn.LeakyReLU(negative_slope=0.2) ) self.model_decoder = nn.Sequential( nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.LeakyReLU(negative_slope=0.2), #nn.UpsamplingNearest2d(scale_factor=2), # 8 nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(256), nn.LeakyReLU(negative_slope=0.2), # nn.UpsamplingNearest2d(scale_factor=2), # 16 nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(128), nn.LeakyReLU(negative_slope=0.2), # nn.UpsamplingNearest2d(scale_factor=2), # 32 nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(64), nn.LeakyReLU(negative_slope=0.2), # nn.UpsamplingNearest2d(scale_factor=2), # 64 nn.ConvTranspose2d(in_channels=64, out_channels=32, kernel_size=4, stride=2, padding=1), nn.BatchNorm2d(32), nn.LeakyReLU(negative_slope=0.2), nn.ConvTranspose2d(in_channels=32, out_channels=3, kernel_size=3, stride=1, padding=1), # Change to tanh actuvation function # nn.LeakyReLU(negative_slope=0.2) nn.Sigmoid())
def __init__(self): super(DCGAN_G, self).__init__() # input shape: (None, 4, 128, 128) self.conv1 = nn.Conv2d(4, 64, kernel_size=4, stride=2, padding=1) self.bn1 = nn.BatchNorm2d(64) self.act1 = nn.ReLU() self.dp1 = nn.Dropout2d(0.3) # input shape: (None, 64, 64, 64) self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1) self.bn2 = nn.BatchNorm2d(128) self.act2 = nn.ReLU() self.dp2 = nn.Dropout2d(0.3) # input shape: (None, 128, 64, 64) self.conv3 = nn.Conv2d(128, 128, kernel_size=4, stride=2, padding=1) self.bn3 = nn.BatchNorm2d(128) self.act3 = nn.ReLU() self.dp3 = nn.Dropout2d(0.3) # input shape: (None, 256, 32, 32) self.conv4 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1) self.bn4 = nn.BatchNorm2d(256) self.act4 = nn.ReLU() self.dp4 = nn.Dropout2d(0.3) # input shape: (None, 256, 32, 32) self.conv5 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.bn5 = nn.BatchNorm2d(256) self.act5 = nn.ReLU() self.dp5 = nn.Dropout2d(0.3) # # input shape: (None, 256, 32, 32) self.conv6 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.bn6 = nn.BatchNorm2d(256) self.act6 = nn.ReLU() self.dp6 = nn.Dropout2d(0.3) # input shape: (None, 256, 32, 32) self.conv7 = nn.Conv2d(256, 256, kernel_size=3, stride=1, dilation=2, padding=2) self.bn7 = nn.BatchNorm2d(256) self.act7 = nn.ReLU() # input shape: (None, 256, 32, 32) self.conv8 = nn.Conv2d(256, 256, kernel_size=3, stride=1, dilation=4, padding=4) self.bn8 = nn.BatchNorm2d(256) self.act8 = nn.ReLU() # input shape: (None, 256, 32, 32) self.conv9 = nn.Conv2d(256, 256, kernel_size=3, stride=1, dilation=8, padding=8) self.bn9 = nn.BatchNorm2d(256) self.act9 = nn.ReLU() # input shape: (None, 256, 32, 32) self.conv10 = nn.Conv2d(256, 256, kernel_size=3, stride=1, dilation=16, padding=16) self.bn10 = nn.BatchNorm2d(256) self.act10 = nn.ReLU() # input shape: (None, 256, 32, 32) self.conv11 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.bn11 = nn.BatchNorm2d(256) self.act11 = nn.ReLU() # # input shape: (None, 256, 32, 32) self.conv12 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1) self.bn12 = nn.BatchNorm2d(256) self.act12 = nn.ReLU() # input shape: (None, 256, 32, 32) self.deconv13 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1) self.bn13 = nn.BatchNorm2d(128) self.act13 = nn.ReLU() # input shape: (None, 128, 64, 64) self.conv14 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1) self.bn14 = nn.BatchNorm2d(128) self.act14 = nn.ReLU() self.deconv15 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1) self.bn15 = nn.BatchNorm2d(64) self.act15 = nn.ReLU() self.conv16 = nn.Conv2d(64, 32, kernel_size=3, stride=1, padding=1) self.bn16 = nn.BatchNorm2d(32) self.act16 = nn.ReLU() self.conv17 = nn.Conv2d(32, 3, kernel_size=3, stride=1, padding=1) self.act17 = nn.Tanh() initialize_weights(self)
def __init__(self, input_dimension, output_dimension, norm=nn.BatchNorm2d, n_residuals=6): super(Generator, self).__init__() use_bias = norm == nn.InstanceNorm2d hidden_dimension = 64 # Make Encoder # Layer1 self.encoder = nn.Sequential( nn.ReflectionPad2d(3), nn.Conv2d(input_dimension, hidden_dimension, 7, 1, 0, bias=use_bias), norm(hidden_dimension), nn.ReLU(inplace=True), nn.ReflectionPad2d(1), nn.Conv2d(hidden_dimension, 2 * hidden_dimension, 3, 2, 0, bias=use_bias), norm(2 * hidden_dimension), nn.ReLU(inplace=True), nn.ReflectionPad2d(1), nn.Conv2d(2 * hidden_dimension, 4 * hidden_dimension, 3, 2, 0, bias=use_bias), norm(4 * hidden_dimension), nn.ReLU(inplace=True), ) # Make Residuals ... transform the image res_layers = [] for _ in range(n_residuals): res_layers.append(residual_block(hidden_dimension, use_bias, norm)) self.residuals = nn.Sequential(*res_layers) # Make Decoder # Decode self.decoder = nn.Sequential( nn.ConvTranspose2d(4 * hidden_dimension, 2 * hidden_dimension, 3, 2, 1, output_padding=1, bias=use_bias), norm(2 * hidden_dimension), nn.ReLU(inplace=True), nn.ConvTranspose2d(2 * hidden_dimension, hidden_dimension, 3, 2, 1, output_padding=1, bias=use_bias), norm(hidden_dimension), nn.ReLU(inplace=True), nn.ReflectionPad2d(3), nn.Conv2d(hidden_dimension, output_dimension, 7, 1, 0), nn.Tanh())
def __init__(self, out_channels=3, use_residuals=True, out_layer=None): super(UNetSameDecoder, self).__init__() self.use_residuals = use_residuals if self.use_residuals: self.decoder1 = nn.ConvTranspose2d(in_channels=2048, out_channels=1024, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder1_conv = nn.Sequential(nn.Conv2d(in_channels=2048, out_channels=1024, kernel_size=3, padding=1), nn.Conv2d(in_channels=1024, out_channels=1024, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder2 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder2_conv = nn.Sequential(nn.Conv2d(in_channels=1024, out_channels=512, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder3 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder3_conv = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=256, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder4 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder4_conv = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=64, kernel_size=3, padding=1), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.Sigmoid()) self.out = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=out_channels, kernel_size=3, padding=1), nn.Sigmoid()) else: self.decoder1 = nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder1_conv = nn.Sequential(nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder2 = nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder2_conv = nn.Sequential(nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder3 = nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder3_conv = nn.Sequential(nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1), nn.Sigmoid()) self.decoder4 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=3, stride=2, padding=1, output_padding=1) self.decoder4_conv = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, padding=1), nn.Sigmoid()) self.out = nn.Sequential(nn.Conv2d(in_channels=64, out_channels=out_channels, kernel_size=3, padding=1), nn.Sigmoid())
def __init__(self, width, noise_dim, s_chan, a_chan, conv=True, folds=0): super().__init__() self.width = width self.noise_dim = noise_dim self.s_chan = s_chan self.a_chan = a_chan self.conv = conv if not folds: folds = int(np.log(width/8)/np.log(8)) self.enc_width = int(width/(2**folds)) gen_mods = [ nn.Linear(noise_dim+16*self.enc_width**2, 1024), nn.ReLU(), nn.BatchNorm1d(1024), nn.Linear(1024, self.enc_width**2*16), nn.ReLU(), nn.BatchNorm1d(self.enc_width**2*16), View((-1, 16, self.enc_width, self.enc_width))] if folds==3: gen_mods.extend([ nn.ConvTranspose2d(16, 8, 4, 2, 1), nn.LeakyReLU(0.1), nn.BatchNorm2d(8), nn.ConvTranspose2d(8, 8, 4, 2, 1), nn.LeakyReLU(0.1), nn.BatchNorm2d(8), nn.ConvTranspose2d(8, a_chan, 4, 2, 1)]) if folds==2: gen_mods.extend([ nn.ConvTranspose2d(16, 8, 4, 2, 1), nn.LeakyReLU(0.1), nn.BatchNorm2d(8), nn.ConvTranspose2d(8, a_chan, 4, 2, 1)]) if folds==1: gen_mods.extend([ nn.ConvTranspose2d(16, a_chan, 4, 2, 1)]) self.conv_model = nn.Sequential(*gen_mods) enc_mods = [] if folds==1: enc_mods.extend([nn.Conv2d(s_chan, 16, 4, 2, 1), nn.LeakyReLU(0.1)]) if folds==2: enc_mods.extend([nn.Conv2d(s_chan, 8, 4, 2, 1), nn.LeakyReLU(0.1), nn.Conv2d(8, 16, 4, 2, 1), nn.LeakyReLU(0.1)]) if folds==3: enc_mods.extend([nn.Conv2d(s_chan, 8, 4, 2, 1), nn.LeakyReLU(0.1), nn.Conv2d(8, 16, 4, 2, 1), nn.LeakyReLU(0.1), nn.Conv2d(16, 16, 4, 2, 1), nn.LeakyReLU(0.1)]) enc_mods.extend([nn.BatchNorm2d(16)]) self.encoder = nn.Sequential(*enc_mods) self.lin_model = nn.Sequential( #1 nn.Linear(width**2*s_chan + noise_dim, 2048), nn.ReLU(), nn.BatchNorm1d(2048), #2 nn.Linear(2048, 8*8*16), nn.ReLU(), nn.BatchNorm1d(8*8*16), #3 nn.Linear(8*8*16, width**2 *a_chan), nn.Tanh(), View((-1, a_chan, width, width)) )
def __init__(self, n_classes, use_instance_seg, use_coords, pixel_embedding_dim=16): super(Architecture, self).__init__() self.n_classes = n_classes self.use_instance_seg = use_instance_seg self.use_coords = use_coords self.cnn = BaseCNN(use_coords=self.use_coords) self.renet1 = ReNet(n_input=256, n_units=100) self.renet2 = ReNet(n_input=100 * 2, n_units=100) self.upsampling1 = nn.ConvTranspose2d( in_channels=100 * 2, out_channels=100, kernel_size=(2, 2), stride=(2, 2), ) self.relu1 = nn.ReLU() self.upsampling2 = nn.ConvTranspose2d( in_channels=100 + self.cnn.n_filters[1], out_channels=100, kernel_size=(2, 2), stride=(2, 2), ) self.relu2 = nn.ReLU() self.sem_seg_output = nn.Conv2d(in_channels=100 + self.cnn.n_filters[0], out_channels=self.n_classes, kernel_size=(1, 1), stride=(1, 1)) if self.use_instance_seg: self.ins_seg_output = nn.Conv2d(in_channels=100 + self.cnn.n_filters[0], out_channels=pixel_embedding_dim, kernel_size=(1, 1), stride=(1, 1)) self.ins_cls_cnn = nn.Sequential() self.ins_cls_cnn.add_module("pool1", nn.MaxPool2d(2, stride=2)) self.ins_cls_cnn.add_module( "conv1", nn.Conv2d(in_channels=100 * 2, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu1", nn.ReLU()) self.ins_cls_cnn.add_module( "conv2", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu2", nn.ReLU()) self.ins_cls_cnn.add_module("pool2", nn.MaxPool2d(kernel_size=2, stride=2)) self.ins_cls_cnn.add_module( "conv3", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu3", nn.ReLU()) self.ins_cls_cnn.add_module( "conv4", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu4", nn.ReLU()) self.ins_cls_cnn.add_module("pool3", nn.AdaptiveAvgPool2d( (1, 1))) # b, nf, 1, 1 self.ins_cls_out = nn.Sequential() self.ins_cls_out.add_module("linear", nn.Linear(64, 1)) self.ins_cls_out.add_module("sigmoid", nn.Sigmoid())
def __init__(self, encoder_block, decoder_block, layers, num_classes=1000): self.inplanes = 64 super(ResAE, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu1 = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, return_indices=True) self.layer1 = self._make_encoder_layer(encoder_block, 64, layers[0]) self.fc_drop1 = nn.Dropout(p=0.5) self.layer2 = self._make_encoder_layer(encoder_block, 128, layers[1], stride=2) self.fc_drop2 = nn.Dropout(p=0.5) self.layer3 = self._make_encoder_layer(encoder_block, 256, layers[2], stride=2) self.fc_drop3 = nn.Dropout(p=0.5) self.layer4 = self._make_encoder_layer(encoder_block, 512, layers[3], stride=2) self.avgpool = nn.AvgPool2d(7) self.fc_drop4 = nn.Dropout(p=0.5) self.fc = nn.Linear(512 * encoder_block.expansion, p_transform["n_labels"]) self.fc_drop5 = nn.Dropout(p=0.5) self.layer5 = self._make_decoder_layer(decoder_block, 512, layers[3]) self.fc_drop6 = nn.Dropout(p=0.5) self.layer6 = self._make_decoder_layer(decoder_block, 256, layers[2], stride=2) self.fc_drop7 = nn.Dropout(p=0.5) self.layer7 = self._make_decoder_layer(decoder_block, 128, layers[1], stride=2) self.fc_drop8 = nn.Dropout(p=0.5) self.layer8 = self._make_decoder_layer(decoder_block, 64, layers[0], stride=2) self.max_unpool = nn.MaxUnpool2d(kernel_size=3, stride=2, padding=1) self.deconv1 = nn.ConvTranspose2d(64, 64, 7, stride=2, padding=2, output_padding=3, bias=False) self.bn2 = nn.BatchNorm2d(64) self.relu2 = nn.ReLU(inplace=True) self.c1_conv = nn.Conv2d(64, p_transform['channels'], kernel_size=1, stride=1, padding=0, bias=False) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, num_convs=4, roi_feat_size=14, in_channels=256, conv_kernel_size=3, conv_out_channels=256, upsample_method='deconv', upsample_ratio=2, num_classes=81, class_agnostic=False, normalize=None): super(FCNMaskHead, self).__init__() # ipdb.set_trace() # 提供三种上采样方法:转置卷积/最近邻插值/双线性插值 if upsample_method not in [None, 'deconv', 'nearest', 'bilinear']: raise ValueError( 'Invalid upsample method {}, accepted methods ' 'are "deconv", "nearest", "bilinear"'.format(upsample_method)) self.num_convs = num_convs self.roi_feat_size = roi_feat_size # WARN: not used and reserved self.in_channels = in_channels self.conv_kernel_size = conv_kernel_size self.conv_out_channels = conv_out_channels self.upsample_method = upsample_method self.upsample_ratio = upsample_ratio self.num_classes = num_classes self.class_agnostic = class_agnostic self.normalize = normalize self.with_bias = normalize is None self.convs = nn.ModuleList() # Mask RCNN用4组3*3卷积对detection经过RoiAlign后的结果重组特征 for i in range(self.num_convs): in_channels = (self.in_channels if i == 0 else self.conv_out_channels) padding = (self.conv_kernel_size - 1) // 2 self.convs.append( ConvModule( # 基本的卷积模块:conv + norm + activation in_channels, self.conv_out_channels, self.conv_kernel_size, padding=padding, normalize=normalize, bias=self.with_bias)) if self.upsample_method is None: self.upsample = None elif self.upsample_method == 'deconv': self.upsample = nn.ConvTranspose2d( # 加入转置卷积层上采样 self.conv_out_channels, self.conv_out_channels, self.upsample_ratio, stride=self.upsample_ratio) else: self.upsample = nn.Upsample( # 最近邻/双线性插值上采样 scale_factor=self.upsample_ratio, mode=self.upsample_method) # 不分物体的mask只输出一个通道,如iou-net和mask-scoring rcnn # 进行区分产生mask时,按照所有的cls_num各产生一个mask out_channels = 1 if self.class_agnostic else self.num_classes self.conv_logits = nn.Conv2d(self.conv_out_channels, out_channels, 1) self.relu = nn.ReLU(inplace=True) self.debug_imgs = None
def __init__(self, input_nc, ngf=64, norm_layer=nn.BatchNorm2d, use_dropout=False, n_stages=4, layers=[2, 2, 2, 2]): """Construct a Restep-based generator Parameters: input_nc (int) -- the number of channels in input images output_nc (int) -- the number of channels in output images ngf (int) -- the number of filters in the last conv layer norm_layer -- normalization layer use_dropout (bool) -- if use dropout layers n_blocks (int) -- the number of ResNet blocks padding_type (str) -- the name of padding layer in conv layers: reflect | replicate | zero """ assert (n_stages >= 0) super(RestepGenerator, self).__init__() if type(norm_layer) == functools.partial: use_bias = norm_layer.func == nn.InstanceNorm2d else: use_bias = norm_layer == nn.InstanceNorm2d self.conv1 = nn.Conv2d(input_nc, ngf, kernel_size=7, stride=2, padding=3) self.bn1 = nn.BatchNorm2d(ngf) self.relu = nn.ReLU(inplace=True) # stage 1 self.stage_1 = [] for layer in range(layers[0]): if layer != layers[0] - 1: self.stage_1.append( RestepBlock(ngf, norm_layer, False, use_bias)) else: self.stage_1.append( RestepBlock(ngf, norm_layer, True, use_bias)) self.stage_1 = nn.Sequential(*self.stage_1) self.stage_2 = [ nn.Conv2d(ngf * 2, ngf * 2, 3, 2, 1), nn.BatchNorm2d(ngf * 2), nn.ReLU(inplace=True) ] for layer in range(layers[1]): if layer != layers[1] - 1: self.stage_2.append( RestepBlock(ngf * 2, norm_layer, False, use_bias)) else: self.stage_2.append( RestepBlock(ngf * 2, norm_layer, True, use_bias)) self.stage_2 = nn.Sequential(*self.stage_2) self.stage_3 = [ nn.Conv2d(ngf * 4, ngf * 4, 3, 2, 1), nn.BatchNorm2d(ngf * 4), nn.ReLU(inplace=True) ] for layer in range(layers[2]): if layer != layers[2] - 1: self.stage_3.append( RestepBlock(ngf * 4, norm_layer, False, use_bias)) else: self.stage_3.append( RestepBlock(ngf * 4, norm_layer, True, use_bias)) self.stage_3 = nn.Sequential(*self.stage_3) self.stage_4 = [ nn.Conv2d(ngf * 8, ngf * 8, 3, 2, 1), nn.BatchNorm2d(ngf * 8), nn.ReLU(inplace=True) ] for layer in range(layers[3]): if layer != layers[3] - 1: self.stage_4.append( RestepBlock(ngf * 8, norm_layer, False, use_bias)) else: self.stage_4.append( RestepBlock(ngf * 8, norm_layer, True, use_bias)) self.stage_4 = nn.Sequential(*self.stage_4) self.deconv1 = nn.ConvTranspose2d(in_channels=ngf * 16, out_channels=ngf * 8, kernel_size=3, stride=2, padding=1, output_padding=1) self.de_bn1 = nn.BatchNorm2d(ngf * 8) self.deconv2 = nn.ConvTranspose2d(in_channels=ngf * 8, out_channels=ngf * 4, kernel_size=3, stride=2, padding=1, output_padding=1) self.de_bn2 = nn.BatchNorm2d(ngf * 4) self.deconv3 = nn.ConvTranspose2d(in_channels=ngf * 4, out_channels=ngf * 2, kernel_size=3, stride=2, padding=1, output_padding=1) self.de_bn3 = nn.BatchNorm2d(ngf * 2) self.deconv4 = nn.ConvTranspose2d(in_channels=ngf * 2, out_channels=ngf, kernel_size=3, stride=2, padding=1, output_padding=1) self.de_bn4 = nn.BatchNorm2d(ngf) self.last_layer = nn.Conv2d(in_channels=ngf, out_channels=3, kernel_size=3, padding=1)