def __init__(self, output_blocks=[DEFAULT_BLOCK_INDEX], resize_input=True, normalize_input=True, requires_grad=False): """Build pretrained InceptionV3 Parameters ---------- output_blocks : list of int Indices of blocks to return features of. Possible values are: - 0: corresponds to output of first max pooling - 1: corresponds to output of second max pooling - 2: corresponds to output which is fed to aux classifier - 3: corresponds to output of final average pooling resize_input : bool If true, bilinearly resizes input to width and height 299 before feeding input to model. As the network without fully connected layers is fully convolutional, it should be able to handle inputs of arbitrary size, so resizing might not be strictly needed normalize_input : bool If true, normalizes the input to the statistics the pretrained Inception network expects requires_grad : bool If true, parameters of the model require gradient. Possibly useful for finetuning the network """ super(InceptionV3, self).__init__() self.resize_input = resize_input self.normalize_input = normalize_input self.output_blocks = sorted(output_blocks) self.last_needed_block = max(output_blocks) assert self.last_needed_block <= 3, \ 'Last possible output block index is 3' self.blocks = nn.ModuleList() inception = models.inception_v3(pretrained=True) # Block 0: input to maxpool1 block0 = [ inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3, inception.Conv2d_2b_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block0)) # Block 1: maxpool1 to maxpool2 if self.last_needed_block >= 1: block1 = [ inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block1)) # Block 2: maxpool2 to aux classifier if self.last_needed_block >= 2: block2 = [ inception.Mixed_5b, inception.Mixed_5c, inception.Mixed_5d, inception.Mixed_6a, inception.Mixed_6b, inception.Mixed_6c, inception.Mixed_6d, inception.Mixed_6e, ] self.blocks.append(nn.Sequential(*block2)) # Block 3: aux classifier to final avgpool if self.last_needed_block >= 3: block3 = [ inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c, nn.AdaptiveAvgPool2d(output_size=(1, 1)) ] self.blocks.append(nn.Sequential(*block3)) for param in self.parameters(): param.requires_grad = requires_grad
def __init__(self): super(TestNet, self).__init__() self.net = nn.Sequential( nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1), nn.MaxPool2d(2, 2), Flatten(), nn.Linear(1 * 14 * 14, 10))
def __init__(self, in_channels, out_channels): super().__init__() self.maxpool_conv = nn.Sequential( nn.MaxPool2d(2), DoubleConv(in_channels, out_channels))
def __init__(self, in_dim, out_dim, args, mean_std=None): super(Model, self).__init__() ##### required part, no need to change ##### # mean std of input and output in_m, in_s, out_m, out_s = self.prepare_mean_std(in_dim,out_dim,\ args, mean_std) self.input_mean = torch_nn.Parameter(in_m, requires_grad=False) self.input_std = torch_nn.Parameter(in_s, requires_grad=False) self.output_mean = torch_nn.Parameter(out_m, requires_grad=False) self.output_std = torch_nn.Parameter(out_s, requires_grad=False) # a flag for debugging (by default False) self.model_debug = False self.validation = False ##### # target data protocol_file = prj_conf.optional_argument[0] self.protocol_parser = protocol_parse(protocol_file) # working sampling rate, torchaudio is used to change sampling rate self.m_target_sr = 16000 # re-sampling (optional) self.m_resampler = torchaudio.transforms.Resample( prj_conf.wav_samp_rate, self.m_target_sr) # vad (optional) self.m_vad = torchaudio.transforms.Vad(sample_rate = self.m_target_sr) # flag for balanced class (temporary use) self.v_flag = 1 # frame shift (number of points) self.frame_hops = [160] # frame length self.frame_lens = [320] # FFT length self.fft_n = [512] # LFCC dim (base component) self.lfcc_dim = [20] self.lfcc_with_delta = True # window type self.win = torch.hann_window # floor in log-spectrum-amplitude calculating self.amp_floor = 0.00001 # manual choose the first 600 frames in the data self.v_truncate_lens = [10 * 16 * 750 // x for x in self.frame_hops] # number of sub-models self.v_submodels = len(self.frame_lens) # dimension of embedding vectors self.v_emd_dim = 64 # output class self.v_out_class = 2 self.m_transform = [] self.m_output_act = [] self.m_frontend = [] self.m_angle = [] for idx, (trunc_len, fft_n, lfcc_dim) in enumerate(zip( self.v_truncate_lens, self.fft_n, self.lfcc_dim)): fft_n_bins = fft_n // 2 + 1 if self.lfcc_with_delta: lfcc_dim = lfcc_dim * 3 self.m_transform.append( torch_nn.Sequential( torch_nn.Conv2d(1, 64, [5, 5], 1, padding=[2, 2]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 96, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.BatchNorm2d(48, affine=False), torch_nn.Conv2d(48, 96, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(48, affine=False), torch_nn.Conv2d(48, 128, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch.nn.MaxPool2d([2, 2], [2, 2]), torch_nn.Conv2d(64, 128, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(64, affine=False), torch_nn.Conv2d(64, 64, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 64, [1, 1], 1, padding=[0, 0]), nii_nn.MaxFeatureMap2D(), torch_nn.BatchNorm2d(32, affine=False), torch_nn.Conv2d(32, 64, [3, 3], 1, padding=[1, 1]), nii_nn.MaxFeatureMap2D(), torch_nn.MaxPool2d([2, 2], [2, 2]) ) ) self.m_output_act.append( torch_nn.Sequential( torch_nn.Dropout(0.7), torch_nn.Linear((trunc_len // 16) * (lfcc_dim // 16) * 32, 160), nii_nn.MaxFeatureMap2D(), torch_nn.Linear(80, self.v_emd_dim) ) ) self.m_frontend.append( nii_front_end.LFCC(self.frame_lens[idx], self.frame_hops[idx], self.fft_n[idx], self.m_target_sr, self.lfcc_dim[idx], with_energy=True) ) self.m_angle.append( nii_p2sgrad.P2SActivationLayer(self.v_emd_dim, self.v_out_class) ) self.m_transform = torch_nn.ModuleList(self.m_transform) self.m_output_act = torch_nn.ModuleList(self.m_output_act) self.m_frontend = torch_nn.ModuleList(self.m_frontend) self.m_angle = torch_nn.ModuleList(self.m_angle) # output # done return
def __init__(self, block, layers, groups, reduction, dropout_p=0.2, inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1, num_classes=1000, last_stride=2): """ Parameters ---------- block (nn.Module): Bottleneck class. - For SENet154: SEBottleneck - For SE-ResNet models: SEResNetBottleneck - For SE-ResNeXt models: SEResNeXtBottleneck layers (list of ints): Number of residual blocks for 4 layers of the network (layer1...layer4). groups (int): Number of groups for the 3x3 convolution in each bottleneck block. - For SENet154: 64 - For SE-ResNet models: 1 - For SE-ResNeXt models: 32 reduction (int): Reduction ratio for Squeeze-and-Excitation modules. - For all models: 16 dropout_p (float or None): Drop probability for the Dropout layer. If `None` the Dropout layer is not used. - For SENet154: 0.2 - For SE-ResNet models: None - For SE-ResNeXt models: None inplanes (int): Number of input channels for layer1. - For SENet154: 128 - For SE-ResNet models: 64 - For SE-ResNeXt models: 64 input_3x3 (bool): If `True`, use three 3x3 convolutions instead of a single 7x7 convolution in layer0. - For SENet154: True - For SE-ResNet models: False - For SE-ResNeXt models: False downsample_kernel_size (int): Kernel size for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 3 - For SE-ResNet models: 1 - For SE-ResNeXt models: 1 downsample_padding (int): Padding for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 1 - For SE-ResNet models: 0 - For SE-ResNeXt models: 0 num_classes (int): Number of outputs in `last_linear` layer. - For all models: 1000 """ super(SENet, self).__init__() self.inplanes = inplanes if input_3x3: layer0_modules = [ ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)), ('bn1', nn.BatchNorm2d(64)), ('relu1', nn.ReLU(inplace=True)), ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn2', nn.BatchNorm2d(64)), ('relu2', nn.ReLU(inplace=True)), ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)), ('bn3', nn.BatchNorm2d(inplanes)), ('relu3', nn.ReLU(inplace=True)), ] else: layer0_modules = [ ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', nn.BatchNorm2d(inplanes)), ('relu1', nn.ReLU(inplace=True)), ] # To preserve compatibility with Caffe weights `ceil_mode=True` # is used instead of `padding=1`. layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))) self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1, downsample_padding=0) self.layer2 = self._make_layer( block, planes=128, blocks=layers[1], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer3 = self._make_layer( block, planes=256, blocks=layers[2], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer4 = self._make_layer( block, planes=512, blocks=layers[3], stride=last_stride, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.avg_pool = nn.AvgPool2d(7, stride=1) self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None self.last_linear = nn.Linear(512 * block.expansion, num_classes)
def __init__(self, in_ch, out_ch): super(down, self).__init__() self.mpconv = nn.Sequential(nn.MaxPool2d(2), double_conv(in_ch, out_ch))
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 #number of filters self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group #input 3x64x64 self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) #(64 - 7 + 6)/ 2 + 1 = 32.5 = 32 #Output is 64x32x32 self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #(32 - 3 + 2)/2 + 1 = 16.5 = 16 #Output is 6x16x16 #_make_layer(self, BottleNeck, 64, 3, stride=1, dilate=False): self.layer1 = self._make_layer(block, 64, layers[0]) #3 self.layer2 = self._make_layer(block, 128, layers[1], stride=2, #4 dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, #6 dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, #3 dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, config_channels, prefix, bn=True, ratio=1): nn.Module.__init__(self) # branch0 channels = config_channels.channels branch = [] branch.append( Conv2d(config_channels.channels, config_channels( int(192 * ratio), '%s.branch0.%d.conv.weight' % (prefix, len(branch))), kernel_size=1, stride=1, bn=bn)) branch.append( Conv2d(config_channels.channels, config_channels( int(192 * ratio), '%s.branch0.%d.conv.weight' % (prefix, len(branch))), kernel_size=3, stride=2, bn=bn)) self.branch0 = nn.Sequential(*branch) # branch1 config_channels.channels = channels branch = [] branch.append( Conv2d(config_channels.channels, config_channels( int(256 * ratio), '%s.branch1.%d.conv.weight' % (prefix, len(branch))), kernel_size=1, stride=1, bn=bn)) branch.append( Conv2d(config_channels.channels, config_channels( int(256 * ratio), '%s.branch1.%d.conv.weight' % (prefix, len(branch))), kernel_size=(1, 7), stride=1, padding=(0, 3), bn=bn)) branch.append( Conv2d(config_channels.channels, config_channels( int(320 * ratio), '%s.branch1.%d.conv.weight' % (prefix, len(branch))), kernel_size=(7, 1), stride=1, padding=(3, 0), bn=bn)) branch.append( Conv2d(config_channels.channels, config_channels( int(320 * ratio), '%s.branch1.%d.conv.weight' % (prefix, len(branch))), kernel_size=3, stride=2, bn=bn)) self.branch1 = nn.Sequential(*branch) self.branch2 = nn.MaxPool2d(3, stride=2) # output config_channels.channels = self.branch0[-1].conv.weight.size( 0) + self.branch1[-1].conv.weight.size(0) + channels
def __init__(self, in_size, out_size): super(segnetDown2, self).__init__() self.conv1 = conv2DBatchNormRelu(in_size, out_size, 3, 1, 1) self.conv2 = conv2DBatchNormRelu(out_size, out_size, 3, 1, 1) self.maxpool_with_argmax = nn.MaxPool2d(2, 2, return_indices=True)
def __init__(self, in_channels=1, out_channels=1): """Initializes U-Net.""" super(UNet, self).__init__() # Layers: enc_conv0, enc_conv1, pool1 self._block1 = nn.Sequential( nn.Conv2d(in_channels, 48, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(48, 48, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2)) # Layers: enc_conv(i), pool(i); i=2..5 self._block2 = nn.Sequential(nn.Conv2d(48, 48, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2)) # Layers: enc_conv6, upsample5 self._block3 = nn.Sequential( nn.Conv2d(48, 48, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.ConvTranspose2d(48, 48, 3, stride=2, padding=1, output_padding=1)) #nn.Upsample(scale_factor=2, mode='nearest')) # Layers: dec_conv5a, dec_conv5b, upsample4 self._block4 = nn.Sequential( nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1)) #nn.Upsample(scale_factor=2, mode='nearest')) # Layers: dec_deconv(i)a, dec_deconv(i)b, upsample(i-1); i=4..2 self._block5 = nn.Sequential( nn.Conv2d(144, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(96, 96, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.ConvTranspose2d(96, 96, 3, stride=2, padding=1, output_padding=1)) #nn.Upsample(scale_factor=2, mode='nearest')) # Layers: dec_conv1a, dec_conv1b, dec_conv1c, self._block6 = nn.Sequential( nn.Conv2d(96 + in_channels, 64, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(64, 32, 3, stride=1, padding=1), nn.ReLU(inplace=True), nn.Conv2d(32, out_channels, 3, stride=1, padding=1), #nn.LeakyReLU(0.1)) nn.Tanh()) #nn.Linear()) # Initialize weights self._init_weights()
def __init__(self, num_filters, channels_in, stride): super(IdentityExpansion, self).__init__() # with kernel_size=1, max pooling is equivalent to identity mapping with stride self.identity = nn.MaxPool2d(1, stride=stride) self.num_zeros = num_filters - channels_in
def __init__( self, block: Type[Union[MDL_BasicBlock]], layers: List[int], in_channels = 12, num_classes: int = 1000, zero_init_residual: bool = False, groups: int = 1, width_per_group: int = 64, replace_stride_with_dilation: Optional[List[bool]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None, **kwargs ) -> None: super(MDL_ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 self.project_mode = kwargs.get('project_mode', '1111') if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = conv_task(in_channels, self.inplanes, kernel_size=7, stride=2, pedding=3, is_proj=self.project_mode[0]) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0], is_proj=self.project_mode[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0], is_proj=self.project_mode[1]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1], is_proj=self.project_mode[2]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2], is_proj=self.project_mode[3]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, MDL_BasicBlock): nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def __init__(self, levels, block, in_channels, out_channels, stride=1, level_root=False, root_dim=0, root_kernel_size=1, dilation=1, root_residual=False): super(Tree, self).__init__() if root_dim == 0: root_dim = 2 * out_channels if level_root: root_dim += in_channels if levels == 1: self.tree1 = block(in_channels, out_channels, stride, dilation=dilation) self.tree2 = block(out_channels, out_channels, 1, dilation=dilation) else: self.tree1 = Tree(levels - 1, block, in_channels, out_channels, stride, root_dim=0, root_kernel_size=root_kernel_size, dilation=dilation, root_residual=root_residual) self.tree2 = Tree(levels - 1, block, out_channels, out_channels, root_dim=root_dim + out_channels, root_kernel_size=root_kernel_size, dilation=dilation, root_residual=root_residual) if levels == 1: self.root = Root(root_dim, out_channels, root_kernel_size, root_residual) self.level_root = level_root self.root_dim = root_dim self.downsample = None self.project = None self.levels = levels if stride > 1: self.downsample = nn.MaxPool2d(stride, stride=stride) if in_channels != out_channels: self.project = nn.Sequential( nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, bias=False), nn.BatchNorm2d(out_channels, momentum=BN_MOMENTUM))
def __init__(self, num_classes, loss, block_rgb, layers_rgb, block_contour, layers_contour, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, part_num=3, part_weight=1.0, **kwargs): super(MyModel, self).__init__() self.cnt = 0 if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.loss = loss self.feature_dim_base = 512 self.feature_dim = self.feature_dim_base * block_rgb.expansion self.inplanes = 64 self.dilation = 1 self.part_num = part_num self.part_weight = part_weight self.reduced_dim = 256 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group # Backbone network for appearance feature extraction self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block_rgb, 64, layers_rgb[0]) self.layer2 = self._make_layer(block_rgb, 128, layers_rgb[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block_rgb, 256, layers_rgb[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block_rgb, self.feature_dim_base, layers_rgb[3], stride=last_stride, dilate=replace_stride_with_dilation[2]) self.inplanes = 256 * block_rgb.expansion # self.layer4_part = self._make_layer(block_rgb, self.feature_dim_base, layers_rgb[3], stride=last_stride, # dilate=replace_stride_with_dilation[2]) self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) # self.global_maxpool = nn.AdaptiveMaxPool2d((1, 1)) # self.global_avgpool = GeneralizedMeanPoolingP() self.parts_avgpool = nn.AdaptiveAvgPool2d((self.part_num, 1)) self.conv5 = DimReduceLayer(self.feature_dim_base * block_rgb.expansion, self.reduced_dim, nonlinear='relu') # fc layers definition if fc_dims is None: self.fc = None else: self.fc = self._construct_fc_layer(fc_dims, 512 * block_rgb.expansion, dropout_p) # Backbone network for contour feature extraction self.inplanes = 64 self.conv1_contour = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False) self.bn1_contour = nn.BatchNorm2d(64) self.layer1_contour = self._make_layer(block_contour, 64, layers_contour[0]) self.layer2_contour = self._make_layer(block_contour, 128, layers_contour[1], stride=2) self.layer3_contour = self._make_layer(block_contour, 256, layers_contour[2], stride=2) self.layer4_contour = self._make_layer(block_contour, self.feature_dim_base, layers_contour[3], stride=last_stride) self.conv5_contour = DimReduceLayer(self.feature_dim_base * block_contour.expansion, self.reduced_dim, nonlinear='relu') # Sub-networks for contour graph modeling self.parts_avgpool_contour = nn.AdaptiveAvgPool2d((self.part_num, 3)) # self.parts_avgpool_contour = nn.AdaptiveAvgPool2d((self.part_num, 1)) self.feature_dim_gnn = self.feature_dim_base * block_contour.expansion self.gnns = nn.ModuleList([ GraphConvolution(self.feature_dim_gnn, self.feature_dim_gnn, bias=True) for _ in range(self.part_num + 1) ]) # self.bns_gnn = nn.ModuleList([nn.BatchNorm1d(self.feature_dim_gnn) for _ in range(self.part_num + 1)]) # Bnneck layers self.bnneck_rgb = nn.BatchNorm1d(self.feature_dim) self.bnneck_rgb_part = nn.ModuleList( [nn.BatchNorm1d(self.reduced_dim) for _ in range(self.part_num)]) self.bnneck_contour = nn.BatchNorm1d(self.feature_dim_base * block_contour.expansion) self.bnneck_contour_part = nn.ModuleList( [nn.BatchNorm1d(self.reduced_dim) for _ in range(self.part_num)]) # Classifiers self.classifier = nn.Linear(self.feature_dim, num_classes, bias=False) self.classifier_contour = nn.Linear(self.feature_dim_base * block_contour.expansion, num_classes, bias=False) # self.classifiers_part = nn.ModuleList([nn.Linear(self.reduced_dim, num_classes) for _ in range(self.part_num)]) # self.classifiers_contour_part = nn.ModuleList( # [nn.Linear(self.reduced_dim, num_classes) for _ in range(self.part_num)]) self._init_params() # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, groups=3, widen_factor=1.0, out_indices=(2, ), frozen_stages=-1, conv_cfg=None, norm_cfg=dict(type='BN'), act_cfg=dict(type='ReLU'), norm_eval=False, with_cp=False, init_cfg=None): super(ShuffleNetV1, self).__init__(init_cfg) self.init_cfg = init_cfg self.stage_blocks = [4, 8, 4] self.groups = groups for index in out_indices: if index not in range(0, 3): raise ValueError('the item in out_indices must in ' f'range(0, 3). But received {index}') if frozen_stages not in range(-1, 3): raise ValueError('frozen_stages must be in range(-1, 3). ' f'But received {frozen_stages}') self.out_indices = out_indices self.frozen_stages = frozen_stages self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.norm_eval = norm_eval self.with_cp = with_cp if groups == 1: channels = (144, 288, 576) elif groups == 2: channels = (200, 400, 800) elif groups == 3: channels = (240, 480, 960) elif groups == 4: channels = (272, 544, 1088) elif groups == 8: channels = (384, 768, 1536) else: raise ValueError(f'{groups} groups is not supported for 1x1 ' 'Grouped Convolutions') channels = [make_divisible(ch * widen_factor, 8) for ch in channels] self.in_channels = int(24 * widen_factor) self.conv1 = ConvModule( in_channels=3, out_channels=self.in_channels, kernel_size=3, stride=2, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layers = nn.ModuleList() for i, num_blocks in enumerate(self.stage_blocks): first_block = True if i == 0 else False layer = self.make_layer(channels[i], num_blocks, first_block) self.layers.append(layer)
def __init__(self, in_size, out_size, norm_layer, need_bias, pad, dilation): super(unetDown, self).__init__() self.conv = unetConv2(in_size, out_size, norm_layer, need_bias, pad, dilation) self.down = nn.MaxPool2d(2, 2)
def __init__(self, conv_body_func, fpn_level_info, P2only = False): super().__init__() self.fpn_level_info = fpn_level_info self.P2only = P2only self.dim_out = fpn_dim = cfg.FPN.DIM min_level, max_level = get_min_max_levels() self.num_backbone_stages = len(fpn_level_info.blobs) - (min_level - LOWEST_BACKBONE_LVL) fpn_dim_lateral = fpn_level_info.dims self.spatial_scale = [] # a list of scales for FPN outputs # # Step 1: recursively build down starting from the coarsest backbone level # # For the coarest backbone level: 1x1 conv only seeds recursion self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) if cfg.FPN.USE_GN: self.conv_top = nn.Sequential( nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0, bias = False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps = cfg.GROUP_NORM.EPSILON) ) else: self.conv_top = nn.Conv2d(fpn_dim_lateral[0], fpn_dim, 1, 1, 0) self.topdown_lateral_modules = nn.ModuleList() self.posthoc_modules = nn.ModuleList() # For other levels add top-down and lateral connections for i in range(self.num_backbone_stages - 1): self.topdown_lateral_modules.append( topdown_lateral_module(fpn_dim, fpn_dim_lateral[i + 1]) ) # Post-hoc scale-specific 3x3 convs for i in range(self.num_backbone_stages): if cfg.FPN.USE_GN: self.posthoc_modules.append(nn.Sequential( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1, bias = False), nn.GroupNorm(net_utils.get_group_gn(fpn_dim), fpn_dim, eps = cfg.GROUP_NORM.EPSILON) )) else: self.posthoc_modules.append( nn.Conv2d(fpn_dim, fpn_dim, 3, 1, 1) ) self.spatial_scale.append(fpn_level_info.spatial_scales[i]) # # Step 2: build up starting from the coarsest backbone level # # Check if we need the P6 feature map if not cfg.FPN.EXTRA_CONV_LEVELS and max_level == HIGHEST_BACKBONE_LVL + 1: # Original FPN P6 level implementation from our CVPR'17 FPN paper # Use max pooling to simulate stride 2 subsampling self.maxpool_p6 = nn.MaxPool2d(kernel_size = 1, stride = 2, padding = 0) self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) # Coarser FPN levels introduced for RetinaNet if cfg.FPN.EXTRA_CONV_LEVELS and max_level > HIGHEST_BACKBONE_LVL: self.extra_pyramid_modules = nn.ModuleList() dim_in = fpn_level_info.dims[0] for i in range(HIGHEST_BACKBONE_LVL + 1, max_level + 1): self.extra_pyramid_modules( nn.Conv2d(dim_in, fpn_dim, 3, 2, 1) ) dim_in = fpn_dim self.spatial_scale.insert(0, self.spatial_scale[0] * 0.5) if self.P2only: # use only the finest level self.spatial_scale = self.spatial_scale[-1] self._init_weights() # Deliberately add conv_body after _init_weights. # conv_body has its own _init_weights function self.conv_body = conv_body_func() # e.g resnet
def __init__(self, k, stages): super(CPM, self).__init__() self.k = k self.stages = stages self.pool_center = nn.AvgPool2d(kernel_size=9, stride=8, padding=1) self.conv1_stage1 = nn.Conv2d(3, 128, kernel_size=9, padding=4) self.pool1_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv2_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) self.pool2_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv3_stage1 = nn.Conv2d(128, 128, kernel_size=9, padding=4) self.pool3_stage1 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv4_stage1 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.conv5_stage1 = nn.Conv2d(32, 512, kernel_size=9, padding=4) self.conv6_stage1 = nn.Conv2d(512, 512, kernel_size=1) self.conv7_stage1 = nn.Conv2d(512, self.k + 1, kernel_size=1) self.conv1_stage2 = nn.Conv2d(3, 128, kernel_size=9, padding=4) self.pool1_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv2_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) self.pool2_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv3_stage2 = nn.Conv2d(128, 128, kernel_size=9, padding=4) self.pool3_stage2 = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.conv4_stage2 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.Mconv1_stage2 = nn.Conv2d(32 + self.k + 2, 128, kernel_size=11, padding=5) self.Mconv2_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv3_stage2 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv4_stage2 = nn.Conv2d(128, 128, kernel_size=1, padding=0) self.Mconv5_stage2 = nn.Conv2d(128, self.k + 1, kernel_size=1, padding=0) self.conv1_stage3 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.Mconv1_stage3 = nn.Conv2d(32 + self.k + 2, 128, kernel_size=11, padding=5) self.Mconv2_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv3_stage3 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv4_stage3 = nn.Conv2d(128, 128, kernel_size=1, padding=0) self.Mconv5_stage3 = nn.Conv2d(128, self.k + 1, kernel_size=1, padding=0) self.conv1_stage4 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.Mconv1_stage4 = nn.Conv2d(32 + self.k + 2, 128, kernel_size=11, padding=5) self.Mconv2_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv3_stage4 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv4_stage4 = nn.Conv2d(128, 128, kernel_size=1, padding=0) self.Mconv5_stage4 = nn.Conv2d(128, self.k + 1, kernel_size=1, padding=0) self.conv1_stage5 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.Mconv1_stage5 = nn.Conv2d(32 + self.k + 2, 128, kernel_size=11, padding=5) self.Mconv2_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv3_stage5 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv4_stage5 = nn.Conv2d(128, 128, kernel_size=1, padding=0) self.Mconv5_stage5 = nn.Conv2d(128, self.k + 1, kernel_size=1, padding=0) self.conv1_stage6 = nn.Conv2d(128, 32, kernel_size=5, padding=2) self.Mconv1_stage6 = nn.Conv2d(32 + self.k + 2, 128, kernel_size=11, padding=5) self.Mconv2_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv3_stage6 = nn.Conv2d(128, 128, kernel_size=11, padding=5) self.Mconv4_stage6 = nn.Conv2d(128, 128, kernel_size=1, padding=0) self.Mconv5_stage6 = nn.Conv2d(128, self.k + 1, kernel_size=1, padding=0)
def __init__(self, in_channels, out_channels, first_block=False, epsilon=1e-4, attention=True, freeze_params=False): """ Args: first_block: whether the input comes directly from the efficientnet, if True, downchannel it first, and downsample P5 to generate P6 epsilon: epsilon of fast weighted attention sum of BiFPN, not the BN's epsilon """ super(SingleBiFPN, self).__init__() assert isinstance(in_channels, list) self.first_block = first_block self.epsilon = epsilon self.attention = attention self.freeze_params = freeze_params if self.first_block: self.p2_down_channel = nn.Sequential( Conv2d(in_channels[0], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p3_down_channel = nn.Sequential( Conv2d(in_channels[1], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p4_down_channel = nn.Sequential( Conv2d(in_channels[2], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p5_down_channel = nn.Sequential( Conv2d(in_channels[3], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p5_to_p6 = nn.Sequential( Conv2d(in_channels[3], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), nn.MaxPool2d(3, 2, padding=1)) self.p3_down_channel_2 = nn.Sequential( Conv2d(in_channels[1], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p4_down_channel_2 = nn.Sequential( Conv2d(in_channels[2], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) self.p5_down_channel_2 = nn.Sequential( Conv2d(in_channels[3], out_channels, 1), nn.BatchNorm2d(out_channels, momentum=0.01, eps=1e-3), ) # Conv layers self.conv5_up = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv4_up = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv3_up = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv2_up = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv3_down = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv4_down = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv5_down = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) self.conv6_down = SeparableConvBlock(out_channels, freeze_params=self.freeze_params) # top-down (upsample to target phase's by nearest interpolation) self.p5_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.p4_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.p3_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.p2_upsample = nn.Upsample(scale_factor=2, mode='nearest') # bottom-up (downsample to target phase's by pooling) self.p3_downsample = nn.MaxPool2d(3, 2, padding=1) self.p4_downsample = nn.MaxPool2d(3, 2, padding=1) self.p5_downsample = nn.MaxPool2d(3, 2, padding=1) self.p6_downsample = nn.MaxPool2d(3, 2, padding=1) self.relu = nn.ReLU() # Weight self.p5_w1 = nn.Parameter( torch.ones(2, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p5_w1_relu = nn.ReLU() self.p4_w1 = nn.Parameter( torch.ones(2, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p4_w1_relu = nn.ReLU() self.p3_w1 = nn.Parameter( torch.ones(2, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p3_w1_relu = nn.ReLU() self.p2_w1 = nn.Parameter( torch.ones(2, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p2_w1_relu = nn.ReLU() self.p3_w2 = nn.Parameter( torch.ones(3, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p3_w2_relu = nn.ReLU() self.p4_w2 = nn.Parameter( torch.ones(3, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p4_w2_relu = nn.ReLU() self.p5_w2 = nn.Parameter( torch.ones(3, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p5_w2_relu = nn.ReLU() self.p6_w2 = nn.Parameter( torch.ones(2, dtype=torch.float32), requires_grad=False if self.freeze_params else True) self.p6_w2_relu = nn.ReLU() if self.freeze_params: for m in [ self.p2_down_channel, self.p3_down_channel, self.p4_down_channel, self.p5_down_channel, self.p5_to_p6, self.p3_down_channel_2, self.p4_down_channel_2, self.p5_down_channel_2 ]: for param in m.parameters(): param.requires_grad = False
def __init__(self, in_channels=3, n_classes=1, feature_scale=4, is_deconv=True, is_batchnorm=True): super(UNet_3Plus_DeepSup_CGM, self).__init__() self.is_deconv = is_deconv self.in_channels = in_channels self.is_batchnorm = is_batchnorm self.feature_scale = feature_scale filters = [64, 128, 256, 512, 1024] ## -------------Encoder-------------- self.conv1 = unetConv2(self.in_channels, filters[0], self.is_batchnorm) self.maxpool1 = nn.MaxPool2d(kernel_size=2) self.conv2 = unetConv2(filters[0], filters[1], self.is_batchnorm) self.maxpool2 = nn.MaxPool2d(kernel_size=2) self.conv3 = unetConv2(filters[1], filters[2], self.is_batchnorm) self.maxpool3 = nn.MaxPool2d(kernel_size=2) self.conv4 = unetConv2(filters[2], filters[3], self.is_batchnorm) self.maxpool4 = nn.MaxPool2d(kernel_size=2) self.conv5 = unetConv2(filters[3], filters[4], self.is_batchnorm) ## -------------Decoder-------------- self.CatChannels = filters[0] self.CatBlocks = 5 self.UpChannels = self.CatChannels * self.CatBlocks '''stage 4d''' # h1->320*320, hd4->40*40, Pooling 8 times self.h1_PT_hd4 = nn.MaxPool2d(8, 8, ceil_mode=True) self.h1_PT_hd4_conv = nn.Conv2d(filters[0], self.CatChannels, 3, padding=1) self.h1_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels) self.h1_PT_hd4_relu = nn.ReLU(inplace=True) # h2->160*160, hd4->40*40, Pooling 4 times self.h2_PT_hd4 = nn.MaxPool2d(4, 4, ceil_mode=True) self.h2_PT_hd4_conv = nn.Conv2d(filters[1], self.CatChannels, 3, padding=1) self.h2_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels) self.h2_PT_hd4_relu = nn.ReLU(inplace=True) # h3->80*80, hd4->40*40, Pooling 2 times self.h3_PT_hd4 = nn.MaxPool2d(2, 2, ceil_mode=True) self.h3_PT_hd4_conv = nn.Conv2d(filters[2], self.CatChannels, 3, padding=1) self.h3_PT_hd4_bn = nn.BatchNorm2d(self.CatChannels) self.h3_PT_hd4_relu = nn.ReLU(inplace=True) # h4->40*40, hd4->40*40, Concatenation self.h4_Cat_hd4_conv = nn.Conv2d(filters[3], self.CatChannels, 3, padding=1) self.h4_Cat_hd4_bn = nn.BatchNorm2d(self.CatChannels) self.h4_Cat_hd4_relu = nn.ReLU(inplace=True) # hd5->20*20, hd4->40*40, Upsample 2 times self.hd5_UT_hd4 = nn.Upsample(scale_factor=2, mode='bilinear') # 14*14 self.hd5_UT_hd4_conv = nn.Conv2d(filters[4], self.CatChannels, 3, padding=1) self.hd5_UT_hd4_bn = nn.BatchNorm2d(self.CatChannels) self.hd5_UT_hd4_relu = nn.ReLU(inplace=True) # fusion(h1_PT_hd4, h2_PT_hd4, h3_PT_hd4, h4_Cat_hd4, hd5_UT_hd4) self.conv4d_1 = nn.Conv2d(self.UpChannels, self.UpChannels, 3, padding=1) # 16 self.bn4d_1 = nn.BatchNorm2d(self.UpChannels) self.relu4d_1 = nn.ReLU(inplace=True) '''stage 3d''' # h1->320*320, hd3->80*80, Pooling 4 times self.h1_PT_hd3 = nn.MaxPool2d(4, 4, ceil_mode=True) self.h1_PT_hd3_conv = nn.Conv2d(filters[0], self.CatChannels, 3, padding=1) self.h1_PT_hd3_bn = nn.BatchNorm2d(self.CatChannels) self.h1_PT_hd3_relu = nn.ReLU(inplace=True) # h2->160*160, hd3->80*80, Pooling 2 times self.h2_PT_hd3 = nn.MaxPool2d(2, 2, ceil_mode=True) self.h2_PT_hd3_conv = nn.Conv2d(filters[1], self.CatChannels, 3, padding=1) self.h2_PT_hd3_bn = nn.BatchNorm2d(self.CatChannels) self.h2_PT_hd3_relu = nn.ReLU(inplace=True) # h3->80*80, hd3->80*80, Concatenation self.h3_Cat_hd3_conv = nn.Conv2d(filters[2], self.CatChannels, 3, padding=1) self.h3_Cat_hd3_bn = nn.BatchNorm2d(self.CatChannels) self.h3_Cat_hd3_relu = nn.ReLU(inplace=True) # hd4->40*40, hd4->80*80, Upsample 2 times self.hd4_UT_hd3 = nn.Upsample(scale_factor=2, mode='bilinear') # 14*14 self.hd4_UT_hd3_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd4_UT_hd3_bn = nn.BatchNorm2d(self.CatChannels) self.hd4_UT_hd3_relu = nn.ReLU(inplace=True) # hd5->20*20, hd4->80*80, Upsample 4 times self.hd5_UT_hd3 = nn.Upsample(scale_factor=4, mode='bilinear') # 14*14 self.hd5_UT_hd3_conv = nn.Conv2d(filters[4], self.CatChannels, 3, padding=1) self.hd5_UT_hd3_bn = nn.BatchNorm2d(self.CatChannels) self.hd5_UT_hd3_relu = nn.ReLU(inplace=True) # fusion(h1_PT_hd3, h2_PT_hd3, h3_Cat_hd3, hd4_UT_hd3, hd5_UT_hd3) self.conv3d_1 = nn.Conv2d(self.UpChannels, self.UpChannels, 3, padding=1) # 16 self.bn3d_1 = nn.BatchNorm2d(self.UpChannels) self.relu3d_1 = nn.ReLU(inplace=True) '''stage 2d ''' # h1->320*320, hd2->160*160, Pooling 2 times self.h1_PT_hd2 = nn.MaxPool2d(2, 2, ceil_mode=True) self.h1_PT_hd2_conv = nn.Conv2d(filters[0], self.CatChannels, 3, padding=1) self.h1_PT_hd2_bn = nn.BatchNorm2d(self.CatChannels) self.h1_PT_hd2_relu = nn.ReLU(inplace=True) # h2->160*160, hd2->160*160, Concatenation self.h2_Cat_hd2_conv = nn.Conv2d(filters[1], self.CatChannels, 3, padding=1) self.h2_Cat_hd2_bn = nn.BatchNorm2d(self.CatChannels) self.h2_Cat_hd2_relu = nn.ReLU(inplace=True) # hd3->80*80, hd2->160*160, Upsample 2 times self.hd3_UT_hd2 = nn.Upsample(scale_factor=2, mode='bilinear') # 14*14 self.hd3_UT_hd2_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd3_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels) self.hd3_UT_hd2_relu = nn.ReLU(inplace=True) # hd4->40*40, hd2->160*160, Upsample 4 times self.hd4_UT_hd2 = nn.Upsample(scale_factor=4, mode='bilinear') # 14*14 self.hd4_UT_hd2_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd4_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels) self.hd4_UT_hd2_relu = nn.ReLU(inplace=True) # hd5->20*20, hd2->160*160, Upsample 8 times self.hd5_UT_hd2 = nn.Upsample(scale_factor=8, mode='bilinear') # 14*14 self.hd5_UT_hd2_conv = nn.Conv2d(filters[4], self.CatChannels, 3, padding=1) self.hd5_UT_hd2_bn = nn.BatchNorm2d(self.CatChannels) self.hd5_UT_hd2_relu = nn.ReLU(inplace=True) # fusion(h1_PT_hd2, h2_Cat_hd2, hd3_UT_hd2, hd4_UT_hd2, hd5_UT_hd2) self.conv2d_1 = nn.Conv2d(self.UpChannels, self.UpChannels, 3, padding=1) # 16 self.bn2d_1 = nn.BatchNorm2d(self.UpChannels) self.relu2d_1 = nn.ReLU(inplace=True) '''stage 1d''' # h1->320*320, hd1->320*320, Concatenation self.h1_Cat_hd1_conv = nn.Conv2d(filters[0], self.CatChannels, 3, padding=1) self.h1_Cat_hd1_bn = nn.BatchNorm2d(self.CatChannels) self.h1_Cat_hd1_relu = nn.ReLU(inplace=True) # hd2->160*160, hd1->320*320, Upsample 2 times self.hd2_UT_hd1 = nn.Upsample(scale_factor=2, mode='bilinear') # 14*14 self.hd2_UT_hd1_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd2_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels) self.hd2_UT_hd1_relu = nn.ReLU(inplace=True) # hd3->80*80, hd1->320*320, Upsample 4 times self.hd3_UT_hd1 = nn.Upsample(scale_factor=4, mode='bilinear') # 14*14 self.hd3_UT_hd1_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd3_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels) self.hd3_UT_hd1_relu = nn.ReLU(inplace=True) # hd4->40*40, hd1->320*320, Upsample 8 times self.hd4_UT_hd1 = nn.Upsample(scale_factor=8, mode='bilinear') # 14*14 self.hd4_UT_hd1_conv = nn.Conv2d(self.UpChannels, self.CatChannels, 3, padding=1) self.hd4_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels) self.hd4_UT_hd1_relu = nn.ReLU(inplace=True) # hd5->20*20, hd1->320*320, Upsample 16 times self.hd5_UT_hd1 = nn.Upsample(scale_factor=16, mode='bilinear') # 14*14 self.hd5_UT_hd1_conv = nn.Conv2d(filters[4], self.CatChannels, 3, padding=1) self.hd5_UT_hd1_bn = nn.BatchNorm2d(self.CatChannels) self.hd5_UT_hd1_relu = nn.ReLU(inplace=True) # fusion(h1_Cat_hd1, hd2_UT_hd1, hd3_UT_hd1, hd4_UT_hd1, hd5_UT_hd1) self.conv1d_1 = nn.Conv2d(self.UpChannels, self.UpChannels, 3, padding=1) # 16 self.bn1d_1 = nn.BatchNorm2d(self.UpChannels) self.relu1d_1 = nn.ReLU(inplace=True) # -------------Bilinear Upsampling-------------- self.upscore6 = nn.Upsample(scale_factor=32, mode='bilinear') ### self.upscore5 = nn.Upsample(scale_factor=16, mode='bilinear') self.upscore4 = nn.Upsample(scale_factor=8, mode='bilinear') self.upscore3 = nn.Upsample(scale_factor=4, mode='bilinear') self.upscore2 = nn.Upsample(scale_factor=2, mode='bilinear') # DeepSup self.outconv1 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1) self.outconv2 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1) self.outconv3 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1) self.outconv4 = nn.Conv2d(self.UpChannels, n_classes, 3, padding=1) self.outconv5 = nn.Conv2d(filters[4], n_classes, 3, padding=1) self.cls = nn.Sequential(nn.Dropout(p=0.5), nn.Conv2d(filters[4], 2, 1), nn.AdaptiveMaxPool2d(1), nn.Sigmoid()) # initialise weights for m in self.modules(): if isinstance(m, nn.Conv2d): init_weights(m, init_type='kaiming') elif isinstance(m, nn.BatchNorm2d): init_weights(m, init_type='kaiming')
def __init__(self): super(VGG16, self).__init__() self.layer1 = nn.Sequential( # 1-1 conv layer nn.Conv2d(3, 64, kernel_size=3, padding=1), tnn.BatchNorm2d(64), tnn.ReLU(), # 1-2 conv layer nn.Conv2d(64, 64, kernel_size=3, padding=1), nn.BatchNorm2d(64), nn.ReLU(), # 1 Pooling layer nn.MaxPool2d(kernel_size=2, stride=2)) self.layer2 = nn.Sequential( # 2-1 conv layer nn.Conv2d(64, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), # 2-2 conv layer nn.Conv2d(128, 128, kernel_size=3, padding=1), nn.BatchNorm2d(128), nn.ReLU(), # 2 Pooling lyaer nn.MaxPool2d(kernel_size=2, stride=2)) self.layer3 = nn.Sequential( # 3-1 conv layer nn.Conv2d(128, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), # 3-2 conv layer nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), #3-3 conv layer nn.Conv2d(256, 256, kernel_size=3, padding=1), nn.BatchNorm2d(256), nn.ReLU(), # 3 Pooling layer nn.MaxPool2d(kernel_size=2, stride=2)) self.layer4 = nn.Sequential( # 4-1 conv layer nn.Conv2d(256, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), # 4-2 conv layer nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), #4-3 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), # 4 Pooling layer nn.MaxPool2d(kernel_size=2, stride=2)) self.layer5 = nn.Sequential( # 5-1 conv layer nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), # 5-2 conv layer nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), #5-3 nn.Conv2d(512, 512, kernel_size=3, padding=1), nn.BatchNorm2d(512), nn.ReLU(), # 5 Pooling layer nn.MaxPool2d(kernel_size=2, stride=2)) self.layer6 = nn.Sequential( # 6 Fully connected layer # Dropout layer omitted since batch normalization is used. nn.Linear(512*7*7, 4096), nn.BatchNorm1d(4096), nn.ReLU()) self.layer7 = nn.Sequential( # 7 Fully connected layer # Dropout layer omitted since batch normalization is used. nn.Linear(4096, 4096), nn.BatchNorm1d(4096), nn.ReLU()) self.layer8 = nn.Sequential( # 8 output layer nn.Linear(4096, 2), nn.BatchNorm1d(2), nn.Softmax())
def __init__(self, n_classes=21, learned_billinear=False): super(fcn16s, self).__init__() self.learned_billinear = learned_billinear self.n_classes = n_classes self.loss = functools.partial(cross_entropy2d, size_average=False) self.conv_block1 = nn.Sequential( nn.Conv2d(3, 64, 3, padding=100), nn.ReLU(inplace=True), nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block2 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block3 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block4 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block5 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.classifier = nn.Sequential( nn.Conv2d(512, 4096, 7), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, 4096, 1), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, self.n_classes, 1), ) self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) # TODO: Add support for learned upsampling if self.learned_billinear: raise NotImplementedError
def __init__(self, num_classes=1001): super(InceptionResnetV2, self).__init__() self.conv2d_1a = BasicConv2d(3, 32, kernel_size=3, stride=2) self.conv2d_2a = BasicConv2d(32, 32, kernel_size=3, stride=1) self.conv2d_2b = BasicConv2d(32, 64, kernel_size=3, stride=1, padding=1) self.maxpool_3a = nn.MaxPool2d(3, stride=2) self.conv2d_3b = BasicConv2d(64, 80, kernel_size=1, stride=1) self.conv2d_4a = BasicConv2d(80, 192, kernel_size=3, stride=1) self.maxpool_5a = nn.MaxPool2d(3, stride=2) self.mixed_5b = Mixed_5b() self.repeat = nn.Sequential( Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17), Block35(scale=0.17) ) self.mixed_6a = Mixed_6a() self.repeat_1 = nn.Sequential( Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10), Block17(scale=0.10) ) self.mixed_7a = Mixed_7a() self.repeat_2 = nn.Sequential( Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20), Block8(scale=0.20) ) self.block8 = Block8(noReLU=True) self.conv2d_7b = BasicConv2d(2080, 1536, kernel_size=1, stride=1) self.avgpool_1a = nn.AdaptiveAvgPool2d((1,1)) self.classif = nn.Linear(1536, num_classes)
def __init__(self, n_classes=21, learned_billinear=True): super(fcn8s, self).__init__() self.learned_billinear = learned_billinear self.n_classes = n_classes self.loss = functools.partial(cross_entropy2d, size_average=False) self.conv_block1 = nn.Sequential( nn.Conv2d(3, 64, 3, padding=100), nn.ReLU(inplace=True), nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block2 = nn.Sequential( nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(128, 128, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block3 = nn.Sequential( nn.Conv2d(128, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(256, 256, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block4 = nn.Sequential( nn.Conv2d(256, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.conv_block5 = nn.Sequential( nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.Conv2d(512, 512, 3, padding=1), nn.ReLU(inplace=True), nn.MaxPool2d(2, stride=2, ceil_mode=True), ) self.classifier = nn.Sequential( nn.Conv2d(512, 4096, 7), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, 4096, 1), nn.ReLU(inplace=True), nn.Dropout2d(), nn.Conv2d(4096, self.n_classes, 1), ) self.score_pool4 = nn.Conv2d(512, self.n_classes, 1) self.score_pool3 = nn.Conv2d(256, self.n_classes, 1) if self.learned_billinear: self.upscore2 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore4 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 4, stride=2, bias=False) self.upscore8 = nn.ConvTranspose2d(self.n_classes, self.n_classes, 16, stride=8, bias=False) for m in self.modules(): if isinstance(m, nn.ConvTranspose2d): m.weight.data.copy_( get_upsampling_weight(m.in_channels, m.out_channels, m.kernel_size[0]))
def __init__(self): super(Model, self).__init__() self.conv = nn.Conv2d(1, 16, 5) self.pool = nn.MaxPool2d(2, 2) self.fc = nn.Linear(2304, 10)
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams["channels"])] module_list = nn.ModuleList() for module_i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def["type"] == "convolutional": bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 modules.add_module( f"conv_{module_i}", nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) if module_def["activation"] == "leaky": modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) elif module_def["type"] == "maxpool": kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") modules.add_module(f"upsample_{module_i}", upsample) elif module_def["type"] == "route": layers = [int(x) for x in module_def["layers"].split(",")] filters = sum([output_filters[1:][i] for i in layers]) modules.add_module(f"route_{module_i}", EmptyLayer()) elif module_def["type"] == "shortcut": filters = output_filters[1:][int(module_def["from"])] modules.add_module(f"shortcut_{module_i}", EmptyLayer()) elif module_def["type"] == "yolo": anchor_idxs = [int(x) for x in module_def["mask"].split(",")] # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def["classes"]) img_size = int(hyperparams["height"]) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_size) modules.add_module(f"yolo_{module_i}", yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, inp=10, out=16, kernel_size=3, bias=True): super(MaxPool, self).__init__() self.conv2d = nn.Conv2d(inp, out, kernel_size=kernel_size, bias=bias) self.pool = nn.MaxPool2d(kernel_size=3, padding=1)
def create_modules(module_defs): #创建整个网络流程 """ Constructs module list of layer blocks from module configuration in module_defs Args: module_defs (List):保存网络超参的list Regurns: dict :hyperparams网络超参数 torch.nn.ModuelList :module_list整个网络流程 """ hyperparams = module_defs.pop(0) #获取第一个保存着网络超参的dict,用于设置初始输入。 output_filters = [int(hyperparams["channels"])] #保存每层的输出通道数,用于后面route层或者shortcut层时计算当前输出的通道数。 module_list = nn.ModuleList() #存放各层,最后返回用于前向。 for module_i, module_def in enumerate(module_defs): #遍历各层,编号下标用于定义层名称 modules = nn.Sequential() #每层用Sequential包装 if module_def["type"] == "convolutional": #如果是卷积层 bn = int(module_def["batch_normalize"]) filters = int(module_def["filters"]) kernel_size = int(module_def["size"]) pad = (kernel_size - 1) // 2 modules.add_module( f"conv_{module_i}", nn.Conv2d( in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def["stride"]), padding=pad, bias=not bn, ), ) if bn: #如果卷积层带BN modules.add_module(f"batch_norm_{module_i}", nn.BatchNorm2d(filters, momentum=0.9, eps=1e-5)) #加入BN层 if module_def["activation"] == "leaky": #如果使用Leaky ReLU modules.add_module(f"leaky_{module_i}", nn.LeakyReLU(0.1)) #加入Leaky ReLU层 elif module_def["type"] == "maxpool": #如果是maxpool层 kernel_size = int(module_def["size"]) stride = int(module_def["stride"]) if kernel_size == 2 and stride == 1: modules.add_module(f"_debug_padding_{module_i}", nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module(f"maxpool_{module_i}", maxpool) elif module_def["type"] == "upsample": #如果是上采样层 upsample = Upsample(scale_factor=int(module_def["stride"]), mode="nearest") #上采样层,输入采样倍率以及采样模式,使用F.interpolate函数 modules.add_module(f"upsample_{module_i}", upsample) elif module_def["type"] == "route": #如果是route层 layers = [int(x) for x in module_def["layers"].split(",")] #获取用于route的两个层,因为是叠加层所以sum两层通道数相加 filters = sum([output_filters[1:][i] for i in layers]) #计算输出的通道数,[1:]原因是一开始的3不是第一层输出通道,从第二个开始才是第一层。 modules.add_module(f"route_{module_i}", EmptyLayer()) elif module_def["type"] == "shortcut": #如果是shorcut层 filters = output_filters[1:][int(module_def["from"])] #计算输出的通道数,因为是元素相加层所以通道数不变。 modules.add_module(f"shortcut_{module_i}", EmptyLayer()) elif module_def["type"] == "yolo": #如果是yolo检测层 anchor_idxs = [int(x) for x in module_def["mask"].split(",")] #选中的anchor下标,用于选中anchors,每个预测层使用不同的anchors # Extract anchors anchors = [int(x) for x in module_def["anchors"].split(",")] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] #[(10,13),(16,30),(33,23),(30,61)...] anchors = [anchors[i] for i in anchor_idxs] #获得这个预测层使用的anchor超参 num_classes = int(module_def["classes"]) #该预测层预测的类别数 img_size = int(hyperparams["height"]) #输入到网络的初始图片尺寸,用于后面anchor计算stride # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_size) modules.add_module(f"yolo_{module_i}", yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list
def __init__(self, image_dim=128, memory_dim=128, instr_dim=128, num_embeddings=3, num_rnn_layers=1, vocabulary=None, max_tau=0.2, greedy=True, corr_length=2, var_len=False, script=False, obs_space=None): super().__init__() self.image_dim = image_dim self.memory_dim = memory_dim self.instr_dim = instr_dim self.num_embeddings = num_embeddings self.num_rnn_layers = num_rnn_layers self.obs_space = obs_space self.var_len = var_len # variable correction lengths if vocabulary is not None: self.vocab = vocabulary # Vocabulary object, from obss_preprocessor / None self.vocab_idx2word = self.vocab.idx2word # Add SOS symbol to vocab/get idx self.sos_id = self.vocab['<S>'] else: # if Corrector gets to use own vocabulary (standard) self.vocab_idx2word = { i: 'w' + str(i) for i in range(self.num_embeddings) } self.sos_id = 0 if self.var_len: self.vocab_idx2word[self.num_embeddings] = '<eos>' self.eos_id = self.num_embeddings self.num_embeddings += 1 self.vocab_word2idx = { self.vocab_idx2word[key]: key for key in self.vocab_idx2word } self.instr_embedding = nn.Embedding(obs_space["instr"], self.instr_dim) self.instr_rnn = nn.GRU(self.instr_dim, self.instr_dim, batch_first=True) self.image_conv = nn.Sequential( nn.Conv2d(in_channels=3, out_channels=128, kernel_size=(2, 2), padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2), nn.Conv2d(in_channels=128, out_channels=128, kernel_size=(3, 3), padding=1), nn.BatchNorm2d(128), nn.ReLU(), nn.MaxPool2d(kernel_size=(2, 2), stride=2)) self.film_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=2) num_module = 2 self.controllers = [] for ni in range(num_module): if ni < num_module - 1: mod = ExpertControllerFiLM(in_features=self.instr_dim, out_features=128, in_channels=128, imm_channels=128) else: mod = ExpertControllerFiLM(in_features=self.instr_dim, out_features=self.image_dim, in_channels=128, imm_channels=128) self.controllers.append(mod) self.add_module('FiLM_Controler_' + str(ni), mod) self.memory_rnn = nn.LSTMCell(self.image_dim, self.memory_dim) self.word_embedding_corrector = nn.Embedding( num_embeddings=self.num_embeddings, embedding_dim=self.instr_dim) self.decoder_rnn = nn.GRU(input_size=self.instr_dim, hidden_size=self.memory_dim, num_layers=self.num_rnn_layers, batch_first=True) self.out = nn.Linear(self.memory_dim, self.num_embeddings) # learn tau(following https: // arxiv.org / pdf / 1701.08718.pdf) # Gumbel Softmax temperature self.tau_layer = nn.Sequential(nn.Linear(self.memory_dim, 1), nn.Softplus()) self.max_tau = max_tau self.corr_length = corr_length # maximum length of correction message (if no variable length, always this length) self.greedy = greedy self.random_corrector = False if self.var_len: self.correction_loss = nn.CrossEntropyLoss() self.script = script self.apply(initialize_parameters) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
def create_modules(module_defs): """ Constructs module list of layer blocks from module configuration in module_defs """ hyperparams = module_defs.pop(0) output_filters = [int(hyperparams['channels'])] module_list = nn.ModuleList() for i, module_def in enumerate(module_defs): modules = nn.Sequential() if module_def['type'] == 'convolutional': bn = int(module_def['batch_normalize']) filters = int(module_def['filters']) kernel_size = int(module_def['size']) pad = (kernel_size - 1) // 2 if int(module_def['pad']) else 0 modules.add_module( 'conv_%d' % i, nn.Conv2d(in_channels=output_filters[-1], out_channels=filters, kernel_size=kernel_size, stride=int(module_def['stride']), padding=pad, bias=not bn)) if bn: modules.add_module('batch_norm_%d' % i, nn.BatchNorm2d(filters)) if module_def['activation'] == 'leaky': modules.add_module('leaky_%d' % i, nn.LeakyReLU(0.1)) elif module_def['type'] == 'maxpool': kernel_size = int(module_def['size']) stride = int(module_def['stride']) if kernel_size == 2 and stride == 1: modules.add_module('_debug_padding_%d' % i, nn.ZeroPad2d((0, 1, 0, 1))) maxpool = nn.MaxPool2d(kernel_size=kernel_size, stride=stride, padding=int((kernel_size - 1) // 2)) modules.add_module('maxpool_%d' % i, maxpool) elif module_def['type'] == 'upsample': # upsample = nn.Upsample(scale_factor=int(module_def['stride']), mode='nearest') # WARNING: deprecated upsample = Upsample(scale_factor=int(module_def['stride']), mode='nearest') modules.add_module('upsample_%d' % i, upsample) elif module_def['type'] == 'route': layers = [int(x) for x in module_def['layers'].split(',')] filters = sum( [output_filters[i + 1 if i > 0 else i] for i in layers]) modules.add_module('route_%d' % i, EmptyLayer()) elif module_def['type'] == 'shortcut': filters = output_filters[int(module_def['from'])] modules.add_module('shortcut_%d' % i, EmptyLayer()) elif module_def['type'] == 'yolo': anchor_idxs = [int(x) for x in module_def['mask'].split(',')] # Extract anchors anchors = [float(x) for x in module_def['anchors'].split(',')] anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)] anchors = [anchors[i] for i in anchor_idxs] num_classes = int(module_def['classes']) img_height = int(hyperparams['height']) # Define detection layer yolo_layer = YOLOLayer(anchors, num_classes, img_height, anchor_idxs, cfg=hyperparams['cfg']) modules.add_module('yolo_%d' % i, yolo_layer) # Register module list and number of output filters module_list.append(modules) output_filters.append(filters) return hyperparams, module_list