OPS = { 'none' : lambda C, stride, affine: Zero(stride), 'avg_pool_3x3' : lambda C, stride, affine: nn.AvgPool2d(3, stride=stride, padding=1, count_include_pad=False), 'max_pool_3x3' : lambda C, stride, affine: nn.MaxPool2d(3, stride=stride, padding=1), 'skip_connect' : lambda C, stride, affine: Identity() if stride == 1 else FactorizedReduce(C, C, affine=affine), 'sep_conv_3x3' : lambda C, stride, affine: SepConv(C, C, 3, stride, 1, affine=affine), 'sep_conv_5x5' : lambda C, stride, affine: SepConv(C, C, 5, stride, 2, affine=affine), 'sep_conv_7x7' : lambda C, stride, affine: SepConv(C, C, 7, stride, 3, affine=affine), 'dil_conv_3x3' : lambda C, stride, affine: DilConv(C, C, 3, stride, 2, 2, affine=affine), 'dil_conv_5x5' : lambda C, stride, affine: DilConv(C, C, 5, stride, 4, 2, affine=affine), 'conv_7x1_1x7' : lambda C, stride, affine: nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C, C, (1,7), stride=(1, stride), padding=(0, 3), bias=False), nn.Conv2d(C, C, (7,1), stride=(stride, 1), padding=(3, 0), bias=False), nn.BatchNorm2d(C, affine=affine) ), } class ReLUConvBN(nn.Module): def __init__(self, C_in, C_out, kernel_size, stride, padding, affine=True): super(ReLUConvBN, self).__init__() self.op = nn.Sequential( nn.ReLU(inplace=False), nn.Conv2d(C_in, C_out, kernel_size, stride=stride, padding=padding, bias=False), nn.BatchNorm2d(C_out, affine=affine) ) def forward(self, x): return self.op(x)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups super(Conv, self).__init__() self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) self.bn = nn.BatchNorm2d(c2) self.act = nn.Hardswish() if act else nn.Identity()
def conv_3x3_bn(inp, oup, stride): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True) )
def __init__(self, num_classes, input_size, k=7): super(GCN_COMBINED, self).__init__() self.K = k self.input_size = input_size dense161_path = os.path.join(pretrained_dir, 'densenet161.pth') densenet = models.densenet161() densenet.load_state_dict(torch.load(dense161_path)) self.layer0 = nn.Sequential( densenet.features.conv0, densenet.features.norm0, densenet.features.relu0, ) self.layer1 = nn.Sequential( densenet.features.pool0, densenet.features.denseblock1, ) self.layer2 = nn.Sequential( densenet.features.transition1, densenet.features.denseblock2, ) self.layer3 = nn.Sequential( densenet.features.transition2, densenet.features.denseblock3, ) self.layer4 = nn.Sequential( densenet.features.transition3, densenet.features.denseblock4, ) self.gcm1 = _GlobalConvModule(2208, num_classes, (self.K, self.K)) self.gcm2 = _GlobalConvModule(2112, num_classes, (self.K, self.K)) self.gcm3 = _GlobalConvModule(768, num_classes, (self.K, self.K)) self.gcm4 = _GlobalConvModule(384, num_classes, (self.K, self.K)) self.brm1 = _BoundaryRefineModule(num_classes) self.brm2 = _BoundaryRefineModule(num_classes) self.brm3 = _BoundaryRefineModule(num_classes) self.brm4 = _BoundaryRefineModule(num_classes) self.brm5 = _BoundaryRefineModule(num_classes) self.brm6 = _BoundaryRefineModule(num_classes) self.brm7 = _BoundaryRefineModule(num_classes) self.brm8 = _BoundaryRefineModule(num_classes) self.brm9 = _BoundaryRefineModule(num_classes) self.deconv = _LearnedBilinearDeconvModule(num_classes) self.psp = _PyramidPoolingModule(num_classes, 12, input_size, levels=(1, 2, 3, 6, 9)) self.final = nn.Sequential( nn.Conv2d(num_classes + self.psp.out_channels, num_classes, kernel_size=3, padding=1), nn.BatchNorm2d(num_classes), nn.ReLU(inplace=True), nn.Conv2d(num_classes, num_classes, kernel_size=1, padding=0)) initialize_weights(self.gcm1, self.gcm2, self.gcm3, self.gcm4, self.brm1, self.brm2, self.brm3, self.brm4, self.brm5, self.brm6, self.brm7, self.brm8, self.brm9) initialize_weights(self.psp, self.final)
def __init__(self): super(VGG, self).__init__() # 16 self.layer_1 = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.layer_2 = nn.Sequential( nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) # 8 self.layer_3 = nn.Sequential( nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(256), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.layer_4 = nn.Sequential( nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) # 4 self.layer_5 = nn.Sequential( nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1), nn.BatchNorm2d(512), nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2)) self.relu = nn.ReLU(inplace=True) self.deconv1 = nn.ConvTranspose2d(512, 512, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn1 = nn.BatchNorm2d(512) self.deconv2 = nn.ConvTranspose2d(512, 256, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn2 = nn.BatchNorm2d(256) self.deconv3 = nn.ConvTranspose2d(256, 128, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn3 = nn.BatchNorm2d(128) self.deconv4 = nn.ConvTranspose2d(128, 64, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn4 = nn.BatchNorm2d(64) self.deconv5 = nn.ConvTranspose2d(64, 32, kernel_size=3, stride=2, padding=1, dilation=1, output_padding=1) self.bn5 = nn.BatchNorm2d(32) self.classifier = nn.Conv2d(32, 21, kernel_size=1)
def __init__(self, in_plane, plane, kernel_size=3, padding=1, stride=1, atrous=1): super(Block, self).__init__() self.conv1 = nn.Conv2d(in_plane, plane, kernel_size=kernel_size, padding=padding, stride=stride, dilation=1) self.bn1 = nn.BatchNorm2d(plane) self.relu1 = nn.ReLU(inplace=True)
blk = Residual(3, 3) X = torch.rand((4, 3, 6, 6)) print(blk(X).shape) # 我们也可以在增加输出通道数的同时减半输出的高和宽。 blk = Residual(3, 6, use_1x1conv=True, stride=2) print(blk(X).shape) print("*"*50) # ResNet模型 net = nn.Sequential( nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3), nn.BatchNorm2d(64), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)) def resnet_block(in_channels, out_channels, num_residuals, first_block=False): # num_residuals:残差数 if first_block: assert in_channels == out_channels # 第一个模块的通道数同输入通道数一致 blk = [] for i in range(num_residuals): if i == 0 and not first_block: blk.append(Residual(in_channels, out_channels, use_1x1conv=True, stride=2)) else: blk.append(Residual(out_channels, out_channels)) return nn.Sequential(*blk)
def conv_bn(inp, oup, stride=1, leaky=0): return nn.Sequential(nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), nn.LeakyReLU(negative_slope=leaky, inplace=True))
def __init__(self, image_channels, num_classes): """ Is called when model is initialized. Args: image_channels. Number of color channels in image (3) num_classes: Number of classes we want to predict (10) """ super().__init__() # TODO: Implement this function (Task 2a) num_filters = 32 # Set number of filters in first conv layer self.num_classes = num_classes # Define the convolutional layers self.feature_extractor = nn.Sequential( #[32*32*3] nn.Conv2d(in_channels=image_channels, out_channels=num_filters, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(num_filters), nn.Conv2d(in_channels=num_filters, out_channels=num_filters, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(num_filters), nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(p=0.2), #[16*16*32] nn.Conv2d(in_channels=num_filters, out_channels=64, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(64), nn.Conv2d(in_channels=64, out_channels=64, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(64), nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(p=0.2), #[8*8*64] nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(128), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=5, stride=1, padding=2), nn.LeakyReLU(0.01), nn.BatchNorm2d(128), nn.MaxPool2d(kernel_size=2, stride=2), nn.Dropout(p=0.2)) # The output of feature_extractor will be [batch_size, num_filters, 16, 16] self.num_output_features = 4 * 4 * 128 #8*8*784 #32*32*32 # Initialize our last fully connected layer # Inputs all extracted features from the convolutional layers # Outputs num_classes predictions, 1 for each class. # There is no need for softmax activation function, as this is # included with nn.CrossEntropyLoss self.classifier = nn.Sequential( nn.Linear(self.num_output_features, 64), nn.LeakyReLU(0.01), nn.BatchNorm1d(64), nn.Dropout(p=0.2), nn.Linear(64, num_classes))
def __init__(self, in_planes, n1x1, n3x3red, n3x3, n5x5red, n5x5, pool_planes, honey_rate, tmp_name): super(Inception, self).__init__() self.honey_rate = honey_rate self.tmp_name=tmp_name self.n1x1 = n1x1 self.n3x3 = n3x3 self.n5x5 = n5x5 self.pool_planes = pool_planes # 1x1 conv branch if self.n1x1: conv1x1 = nn.Conv2d(in_planes, n1x1, kernel_size=1) conv1x1.tmp_name = self.tmp_name self.branch1x1 = nn.Sequential( conv1x1, nn.BatchNorm2d(n1x1), nn.ReLU(True), ) # 1x1 conv -> 3x3 conv branch if self.n3x3: conv3x3_1=nn.Conv2d(in_planes, int(n3x3red * self.honey_rate / 10), kernel_size=1) conv3x3_2=nn.Conv2d(int(n3x3red * self.honey_rate /10), n3x3, kernel_size=3, padding=1) conv3x3_1.tmp_name = self.tmp_name conv3x3_2.tmp_name = self.tmp_name self.branch3x3 = nn.Sequential( conv3x3_1, nn.BatchNorm2d(int(n3x3red * self.honey_rate/10)), nn.ReLU(True), conv3x3_2, nn.BatchNorm2d(n3x3), nn.ReLU(True), ) # 1x1 conv -> 5x5 conv branch if self.n5x5 > 0: conv5x5_1 = nn.Conv2d(in_planes, int(n5x5red * self.honey_rate/10), kernel_size=1) conv5x5_2 = nn.Conv2d(int(n5x5red * self.honey_rate /10), int(n5x5 * self.honey_rate/10), kernel_size=3, padding=1) conv5x5_3 = nn.Conv2d(int(n5x5 * self.honey_rate/10), n5x5, kernel_size=3, padding=1) conv5x5_1.tmp_name = self.tmp_name conv5x5_2.tmp_name = self.tmp_name conv5x5_3.tmp_name = self.tmp_name self.branch5x5 = nn.Sequential( conv5x5_1, nn.BatchNorm2d(int(n5x5red * self.honey_rate/10)), nn.ReLU(True), conv5x5_2, nn.BatchNorm2d(int(n5x5 * self.honey_rate/10)), nn.ReLU(True), conv5x5_3, nn.BatchNorm2d(n5x5), nn.ReLU(True), ) # 3x3 pool -> 1x1 conv branch if self.pool_planes > 0: conv_pool = nn.Conv2d(in_planes, pool_planes, kernel_size=1) conv_pool.tmp_name = self.tmp_name self.branch_pool = nn.Sequential( nn.MaxPool2d(3, stride=1, padding=1), conv_pool, nn.BatchNorm2d(pool_planes), nn.ReLU(True), )
def conv_bn_no_relu(inp, oup, stride): return nn.Sequential( nn.Conv2d(inp, oup, 3, stride, 1, bias=False), nn.BatchNorm2d(oup), )
def __init__(self, dim): super(IBNBlock, self).__init__() self.dim_half = dim // 2 self.batchnorm = nn.BatchNorm2d(self.dim_half) self.instancenorm = nn.InstanceNorm2d(self.dim_half)
def create_network(self, blocks): models = nn.ModuleList() prev_filters = 3 out_filters =[] conv_id = 0 for block in blocks: if block['type'] == 'net': prev_filters = int(block['channels']) continue elif block['type'] == 'convolutional': conv_id = conv_id + 1 batch_normalize = int(block['batch_normalize']) filters = int(block['filters']) kernel_size = int(block['size']) stride = int(block['stride']) is_pad = int(block['pad']) pad = (kernel_size-1)//2 if is_pad else 0 activation = block['activation'] model = nn.Sequential() if batch_normalize: model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad, bias=False)) model.add_module('bn{0}'.format(conv_id), nn.BatchNorm2d(filters, eps=1e-4)) #model.add_module('bn{0}'.format(conv_id), BN2d(filters)) else: model.add_module('conv{0}'.format(conv_id), nn.Conv2d(prev_filters, filters, kernel_size, stride, pad)) if activation == 'leaky': model.add_module('leaky{0}'.format(conv_id), nn.LeakyReLU(0.1, inplace=True)) elif activation == 'relu': model.add_module('relu{0}'.format(conv_id), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'maxpool': pool_size = int(block['size']) stride = int(block['stride']) if stride > 1: model = nn.MaxPool2d(pool_size, stride) else: model = MaxPoolStride1() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'avgpool': model = GlobalAvgPool2d() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'softmax': model = nn.Softmax() out_filters.append(prev_filters) models.append(model) elif block['type'] == 'cost': if block['_type'] == 'sse': model = nn.MSELoss(size_average=True) elif block['_type'] == 'L1': model = nn.L1Loss(size_average=True) elif block['_type'] == 'smooth': model = nn.SmoothL1Loss(size_average=True) out_filters.append(1) models.append(model) elif block['type'] == 'reorg': stride = int(block['stride']) prev_filters = stride * stride * prev_filters out_filters.append(prev_filters) models.append(Reorg(stride)) elif block['type'] == 'route': layers = block['layers'].split(',') ind = len(models) layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers] if len(layers) == 1: prev_filters = out_filters[layers[0]] elif len(layers) == 2: assert(layers[0] == ind - 1) prev_filters = out_filters[layers[0]] + out_filters[layers[1]] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'shortcut': ind = len(models) prev_filters = out_filters[ind-1] out_filters.append(prev_filters) models.append(EmptyModule()) elif block['type'] == 'connected': filters = int(block['output']) if block['activation'] == 'linear': model = nn.Linear(prev_filters, filters) elif block['activation'] == 'leaky': model = nn.Sequential( nn.Linear(prev_filters, filters), nn.LeakyReLU(0.1, inplace=True)) elif block['activation'] == 'relu': model = nn.Sequential( nn.Linear(prev_filters, filters), nn.ReLU(inplace=True)) prev_filters = filters out_filters.append(prev_filters) models.append(model) elif block['type'] == 'region': loss = RegionLoss() anchors = block['anchors'].split(',') if anchors == ['']: loss.anchors = [] else: loss.anchors = [float(i) for i in anchors] loss.num_classes = int(block['classes']) loss.num_anchors = int(block['num']) loss.anchor_step = len(loss.anchors)//loss.num_anchors loss.object_scale = float(block['object_scale']) loss.noobject_scale = float(block['noobject_scale']) loss.class_scale = float(block['class_scale']) loss.coord_scale = float(block['coord_scale']) out_filters.append(prev_filters) models.append(loss) else: print('unknown type %s' % (block['type'])) return models
def test_batchnorm_training(self): x = Variable(torch.randn(2, 2, 2, 2).fill_(1.0), requires_grad=True) self.assertONNX(nn.BatchNorm2d(2), x, training=True)
def up_pooling(in_channels, out_channels, kernel_size=2, stride=2): return nn.Sequential( nn.ConvTranspose2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) )
def __init__(self,in_ch=3,out_ch=3,dirate=1): super(REBNCONV,self).__init__() self.conv_s1 = nn.Conv2d(in_ch,out_ch,3,padding=1*dirate,dilation=1*dirate) self.bn_s1 = nn.BatchNorm2d(out_ch) self.relu_s1 = nn.ReLU(inplace=True)
def __init__(self): super(TestModel, self).__init__() self.conv = nn.Conv2d(3, 10, 5) self.bn = nn.BatchNorm2d(10) self.relu = nn.ReLU()
def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False): super(BasicConv, self).__init__() self.out_channels = out_planes self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias) self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None self.relu = nn.PReLU() if relu else None
def _conv2d1x1(in_channels, out_channels, stride=1): """1x1 convolution for contraction and expansion of the channels dimension conv is followed by batch norm""" return nn.Sequential(nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False), nn.BatchNorm2d(out_channels))
def __init__(self, heads, head_conv, n_class=1000, input_size=224, width_mult=1.): super(ShuffleNetV2, self).__init__() self.inplanes = 24 self.deconv_with_bias = False assert input_size % 32 == 0 self.stage_repeats = [4, 8, 4] #self.stage_repeats = [2, 3, 2] # index 0 is invalid and should never be called. # only used for indexing convenience. if width_mult == 0.5: self.stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif width_mult == 1.0: self.stage_out_channels = [-1, 24, 116, 232, 464, 512] elif width_mult == 1.5: self.stage_out_channels = [-1, 24, 176, 352, 704, 1024] elif width_mult == 2.0: self.stage_out_channels = [-1, 24, 224, 488, 976, 2048] else: raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(num_groups)) # building first layer input_channel = self.stage_out_channels[1] self.conv1 = conv_bn(3, input_channel, 2) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.features = [] # building inverted residual blocks for idxstage in range(len(self.stage_repeats)): #self.features.append(SELayer(input_channel)) numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: #inp, oup, stride, benchmodel): self.features.append( InvertedResidual(input_channel, output_channel, 2, 2)) else: self.features.append( InvertedResidual(input_channel, output_channel, 1, 1)) input_channel = output_channel self.inplanes = output_channel # make it nn.Sequential self.features = nn.Sequential(*self.features) # consider here to add the last sevearal layers # building last several layers # self.conv_last = conv_1x1_bn(input_channel, self.stage_out_channels[-1]) self.conv_final = conv_1x1_bn(input_channel, self.stage_out_channels[-1]) self.inplanes = self.stage_out_channels[-1] self.cem1 = nn.Sequential(SELayer(116), conv_1x1_bn(116, 128)) self.cem2 = nn.Sequential( SELayer(232), nn.ConvTranspose2d(in_channels=232, out_channels=128, kernel_size=1, stride=2, padding=0, output_padding=1, bias=self.deconv_with_bias), nn.BatchNorm2d(128), nn.ReLU(inplace=True)) self.cem3 = self._make_deconv_layer( 2, [256, 128], [1, 1], ) self.cemse = SELayer(128) # add heads # add heads self.heads = heads for head in self.heads: classes = self.heads[head] if head_conv > 0: fc = nn.Sequential( nn.Conv2d(self.inplanes, head_conv, kernel_size=3, padding=1, bias=True), nn.ReLU(inplace=True), nn.Conv2d(head_conv, classes, kernel_size=1, stride=1, padding=0, bias=True)) if 'hm' in head: fc[-1].bias.data.fill_(-2.19) else: fill_fc_weights(fc) else: fc = nn.Conv2d(self.inplanes, classes, kernel_size=1, stride=1, padding=0, bias=True) if 'hm' in head: fc.bias.data.fill_(-2.19) else: fill_fc_weights(fc) self.__setattr__(head, fc)
def _make_fc(self, inplanes, outplanes): conv = nn.Conv2d(inplanes, outplanes, kernel_size=1) bn = nn.BatchNorm2d(outplanes) return nn.Sequential(conv, bn, self.relu)
def __init__(self, in_planes, out_planes, kernel_size, stride, padding=0): super(BasicConv2d, self).__init__() self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) # verify bias false self.bn = nn.BatchNorm2d(out_planes, eps=0.001, momentum=0, affine=True) self.relu = nn.ReLU(inplace=True)
def __init__(self, input_size, output_size, conv_num=1, criterion=nn.MSELoss(), learning_rate=0.01): super(CDAutoEncoder, self).__init__() if conv_num == 2: self.forward_pass = nn.Sequential( nn.Conv2d(input_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.Conv2d(output_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)) self.backward_pass = nn.Sequential( nn.ConvTranspose2d(output_size, output_size, kernel_size=(2, 2), stride=(2, 2)), nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.ConvTranspose2d(output_size, input_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(input_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True)) if conv_num == 1: self.forward_pass = nn.Sequential( nn.Conv2d(input_size, output_size, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)), nn.BatchNorm2d(output_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)) self.backward_pass = nn.Sequential( nn.ConvTranspose2d(output_size, input_size, kernel_size=(2, 2), stride=(2, 2)), nn.BatchNorm2d(input_size, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True), nn.ReLU(inplace=True)) self.criterion = criterion self.optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate, momentum=0.9)
def convbn_2d_lrelu(in_planes, out_planes, kernel_size, stride, pad, dilation=1, bias=False): return nn.Sequential( nn.Conv2d(in_planes, out_planes, kernel_size=(kernel_size, kernel_size), stride=(stride, stride), padding=(pad, pad), dilation=(dilation, dilation), bias=bias), nn.BatchNorm2d(out_planes), nn.LeakyReLU(0.1, inplace=True))
def __init__( self, n, nstack, dims, modules, heads, pre=None, cnv_dim=256, make_tl_layer=None, make_br_layer=None, make_cnv_layer=make_cnv_layer, make_heat_layer=make_kp_layer, make_tag_layer=make_kp_layer, make_regr_layer=make_kp_layer, make_up_layer=make_layer, make_low_layer=make_layer, make_hg_layer=make_layer, make_hg_layer_revr=make_layer_revr, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer, make_inter_layer=make_inter_layer, kp_layer=residual ): super(exkp, self).__init__() self.nstack = nstack self.heads = heads curr_dim = dims[0] self.pre = nn.Sequential( convolution(7, 3, 128, stride=2), residual(3, 128, 256, stride=2) ) if pre is None else pre self.kps = nn.ModuleList([ kp_module( n, dims, modules, layer=kp_layer, make_up_layer=make_up_layer, make_low_layer=make_low_layer, make_hg_layer=make_hg_layer, make_hg_layer_revr=make_hg_layer_revr, make_pool_layer=make_pool_layer, make_unpool_layer=make_unpool_layer, make_merge_layer=make_merge_layer ) for _ in range(nstack) ]) self.cnvs = nn.ModuleList([ make_cnv_layer(curr_dim, cnv_dim) for _ in range(nstack) ]) self.inters = nn.ModuleList([ make_inter_layer(curr_dim) for _ in range(nstack - 1) ]) self.inters_ = nn.ModuleList([ nn.Sequential( nn.Conv2d(curr_dim, curr_dim, (1, 1), bias=False), nn.BatchNorm2d(curr_dim) ) for _ in range(nstack - 1) ]) self.cnvs_ = nn.ModuleList([ nn.Sequential( nn.Conv2d(cnv_dim, curr_dim, (1, 1), bias=False), nn.BatchNorm2d(curr_dim) ) for _ in range(nstack - 1) ]) ## keypoint heatmaps for head in heads.keys(): if 'hm' in head: module = nn.ModuleList([ make_heat_layer( cnv_dim, curr_dim, heads[head]) for _ in range(nstack) ]) self.__setattr__(head, module) for heat in self.__getattr__(head): heat[-1].bias.data.fill_(-2.19) else: module = nn.ModuleList([ make_regr_layer( cnv_dim, curr_dim, heads[head]) for _ in range(nstack) ]) self.__setattr__(head, module) self.relu = nn.ReLU(inplace=True)
def convbn_relu(in_planes, out_planes, kernel_size, stride, pad, dilation): return nn.Sequential(nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=dilation if dilation > 1 else pad, dilation=dilation, bias=False), nn.BatchNorm2d(out_planes), nn.ReLU(inplace=True))
def __init__(self,img_latent_dim,aud_latent_dim): super(Generator, self).__init__() self.img_latent_dim = img_latent_dim self.aud_latent_dim = aud_latent_dim self.ndf = 64 self.ngf = 64 self.img_enc_l1 = nn.Sequential( # input is (nc) x 64 x 64 nn.Conv2d(3, self.ndf, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf), nn.LeakyReLU(0.2, inplace=True) ) self.img_enc_l2 = nn.Sequential( # state size. (ndf) x 32 x 32 nn.Conv2d(self.ndf, self.ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True) ) self.img_enc_l3 = nn.Sequential( # state size. (ndf*2) x 16 x 16 nn.Conv2d(self.ndf * 2, self.ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True) ) self.img_enc_l4 = nn.Sequential( # state size. (ndf*4) x 8 x 8 nn.Conv2d(self.ndf * 2, self.ndf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True), ) self.img_enc_l5 = nn.Conv2d(self.ndf * 2, self.img_latent_dim, 4, 1, 0, bias=False) self.audio_encoder = nn.Sequential( nn.Conv2d(1, self.ndf//2, 1, 1, 0, bias=False), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(self.ndf//2, self.ndf * 2, 1, 1, 0, bias=False), nn.BatchNorm2d(self.ndf * 2), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(self.ndf* 2, self.ndf, 1, 1, 0, bias=False), nn.BatchNorm2d(self.ndf), nn.LeakyReLU(0.2, inplace=True), nn.Conv2d(self.ndf, self.ndf, 1, 1, 0, bias=False), ) self.audio_linear = nn.Linear(self.ndf*60*3,self.aud_latent_dim) self.lstm_hiddend_dim = self.aud_latent_dim*2 self.encoded_dim = self.img_latent_dim + self.lstm_hiddend_dim + 100 self.decoder_l1 = nn.Sequential( # input is Z, going into a convolution nn.ConvTranspose2d( self.encoded_dim , self.ngf * 2, 4, 1, 0, bias=False), nn.BatchNorm2d( self.ngf * 2), nn.ReLU(True) ) self.decoder_l2 = nn.Sequential( # state size. (ngf*8) x 4 x 4 nn.ConvTranspose2d( self.ngf * 4, self.ngf * 4, 4, 2, 1, bias=False), nn.BatchNorm2d( self.ngf * 4), nn.ReLU(True) ) self.decoder_l3 = nn.Sequential( # state size. (ngf*4) x 8 x 8 nn.ConvTranspose2d( self.ngf * 6, self.ngf * 2, 4, 2, 1, bias=False), nn.BatchNorm2d( self.ngf * 2), nn.ReLU(True) ) self.decoder_l4 = nn.Sequential( # state size. (ngf*2) x 16 x 16 nn.ConvTranspose2d( self.ngf * 4, self.ngf, 4, 2, 1, bias=False), nn.BatchNorm2d( self.ngf), nn.ReLU(True) ) self.decoder_l5 = nn.Sequential( # state size. (ngf) x 32 x 32 nn.ConvTranspose2d( self.ngf*2, 3, 4, 2, 1, bias=False), nn.Tanh() # state size. (nc) x 64 x 64 ) self.lstm = nn.LSTM(self.aud_latent_dim, self.lstm_hiddend_dim, 1, batch_first=True) self.noise_lstm = nn.LSTM(100,100,1, batch_first=True)
def conv_bn_leru(in_channels, out_channels, kernel_size=3, stride=1, padding=1): return nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding), nn.BatchNorm2d(out_channels), nn.ReLU(inplace=True) )
def conv_1x1_bn(inp, oup): return nn.Sequential( nn.Conv2d(inp, oup, 1, 1, 0, bias=False), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True) )
def __init__(self, block, layers, num_classes=1000): self.inplanes = 64 super(ResNet, self).__init__() self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = nn.BatchNorm2d(64) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.row_pool = nn.AdaptiveAvgPool2d((4, 1)) self.conv_feature = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0) self.drop = nn.Dropout(0.75) # use conv to replace pool self.conv_pool1 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) self.conv_pool2 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) self.conv_pool3 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) self.conv_pool4 = nn.Conv2d(2048, 2048, kernel_size=1, stride=1, padding=0) self.global_pool = nn.AdaptiveAvgPool2d((1, 1)) # classification part self.fc_cls1 = nn.Linear(2048, num_classes) self.fc_cls2 = nn.Linear(2048, num_classes) self.fc_cls3 = nn.Linear(2048, num_classes) self.fc_cls4 = nn.Linear(2048, num_classes) self.conv_bn1 = nn.BatchNorm2d(2048) self.conv_bn2 = nn.BatchNorm2d(2048) self.conv_bn3 = nn.BatchNorm2d(2048) self.conv_bn4 = nn.BatchNorm2d(2048) # initialization for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()