if CHECKPOINT_NAME != None: checkpoint = torch.load(CHECKPOINT_PATH + CHECKPOINT_NAME) train_loader, val_loader, test_loader, label_encoder, num_classes = load_data( checkpoint) else: train_loader, val_loader, test_loader, label_encoder, num_classes = load_data( ) if RESNET_SIZE == 50: model = torchvision.models.resnet50(pretrained=True) elif RESNET_SIZE == 101: model = torchvision.models.resnet101(pretrained=True) else: raise ValueError("Invalid resnet size: ", RESNET_SIZE) model.avg_pool = nn.AdaptiveAvgPool2d(1) model.fc = nn.Linear(model.fc.in_features, num_classes) model.cuda() if CHECKPOINT_NAME != None: model.load_state_dict(checkpoint["model_state_dict"]) epoch = int(checkpoint["epoch"]) + 1 global_step = int(checkpoint["global_step"]) global_start_time = time.time() if not PREDICT_ONLY: print("Training...") criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.flatten = Flatten() self.fc = nn.Linear(512 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, layers, block=None, k=1, use_relu_=False, use_bn_=True, init='kaiming_normal', bn_fc_mode=1, split_output=False, split_size=512, descriptor_size=512, pretrained=None): global use_relu use_relu = use_relu_ global use_bn use_bn = use_bn_ self.use_bn = use_bn self.split_output = split_output self.bn_fc_mode = bn_fc_mode self.inplanes = round(32 * k) super(ResNetCaffe, self).__init__() self.conv1 = nn.Conv2d(3, round(32 * k), kernel_size=3, stride=1, padding=0, bias=not use_bn) scale = calculate_scale(self.conv1.weight.data) torch.nn.init.uniform_(self.conv1.weight.data, -scale, scale) if self.conv1.bias is not None: self.conv1.bias.data.zero_() if self.use_bn: self.bn1 = nn.BatchNorm2d(round(32 * k)) if use_relu: self.relu = nn.ReLU(inplace=True) else: self.relu = nn.PReLU(round(32 * k)) block = block if block is not None else BasicBlock # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, round(64 * k), layers[0]) self.layer2 = self._make_layer(block, round(128 * k), layers[1], stride=2) self.layer3 = self._make_layer(block, round(256 * k), layers[2], stride=2) self.layer4 = self._make_layer(block, round(512 * k), layers[3], stride=2) se_inplanes = 256 self.se_block = SEBlock(256, 256 // 16) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) #self.fc = nn.Linear(512, descriptor_size) if pretrained: self._load_pretrained_weight(pretrained) else: for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, nn.BatchNorm2d): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0)
def __init__( self, num_classes, loss, block, layers, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, **kwargs ): super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.loss = loss self.feature_dim = 512 * block.expansion self.inplanes = 64 self.dilation = 1 self.acm_drop_rate = 0.75 # default is 0.75 self.acm_threshold = 0.8 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError( "replace_stride_with_dilation should be None " "or a 3-element tuple, got {}". format(replace_stride_with_dilation) ) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d( 3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False ) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer( block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0] ) self.layer3 = self._make_layer( block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1] ) self.layer4 = self._make_layer( block, 512, layers[3], stride=last_stride, dilate=replace_stride_with_dilation[2] ) self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = self._construct_fc_layer( fc_dims, 512 * block.expansion, dropout_p ) self.classifier = nn.Linear(self.feature_dim, num_classes) self._init_params() # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, widen_factor=1.0, num_classes=1000, prelu=False, input_channel=3): """ Constructor Args: widen_factor: config of widen_factor num_classes: number of classes """ super(MobileNet, self).__init__() block = DepthWiseBlock self.conv1 = nn.Conv2d(input_channel, int(32 * widen_factor), kernel_size=3, stride=2, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(int(32 * widen_factor)) if prelu: self.relu = nn.PReLU() else: self.relu = nn.ReLU(inplace=True) self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu) self.dw2_2 = block(64 * widen_factor, 128 * widen_factor, stride=2, prelu=prelu) self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu) self.dw3_2 = block(128 * widen_factor, 256 * widen_factor, stride=2, prelu=prelu) self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu) self.dw4_2 = block(256 * widen_factor, 512 * widen_factor, stride=2, prelu=prelu) self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu) self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu) self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu) self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu) self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu) self.dw5_6 = block(512 * widen_factor, 1024 * widen_factor, stride=2, prelu=prelu) self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu) self.avgpool = nn.AdaptiveAvgPool2d(1) self.fc = nn.Linear(int(1024 * widen_factor), num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, n_channels): super().__init__() C = n_channels self.block = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.BatchNorm2d(C), ConvBnReLU(C, C, 1, 1)) self.conv = nn.Conv2d(C, C, 3, 1, padding=1)
def __init__(self, transformer, basis_equiv_layers, fc_sizes, shape_input, sz_output, bias, pool_sz_conv, normalize_basis, stride_conv, lr, normalized_l2, onebyoneconv, basis_equiv_layers_type, pool_type, last_layer_type): super(BasisEquivariantNet, self).__init__() self.last_layer_type = last_layer_type self.basis_equiv_layers_type = basis_equiv_layers_type self.pool_type = pool_type self.normalize_basis = normalize_basis self.pool_sz_conv = pool_sz_conv self.basis_equiv_layers = basis_equiv_layers self.len_non1_basis_equiv_layers = len( [layer for layer in basis_equiv_layers if layer[2] != 1]) self.sz_output = sz_output self.layers = nn.ModuleList() self.pool_sz = pool_sz_conv self.stride_conv = stride_conv if len(shape_input) == 3: if type != 'conv': # image has 1 transformation S # we add tuples (K, S) and (n, n) => (K, S, n, n) shape_input = tuple([shape_input[0], 1]) else: raise ValueError("are we not sending images?") self.shape_input = shape_input # Add layers for idx, (nr_basis, nr_filters, filter_sz) in enumerate(basis_equiv_layers): if pool_type == 'stride': stride = stride_conv[idx] else: stride = 1 if basis_equiv_layers_type != 'conv': if filter_sz != 1: basis_ae_layer = BasisAE( in_shape=shape_input, nr_basis=nr_basis, transformer=transformer, basis_sz=filter_sz, padding=int(filter_sz / 2), normalize=normalize_basis, lr=lr, index=idx, normalized_l2=normalized_l2, basis_type=basis_equiv_layers_type) else: basis_ae_layer = None layer = BasisEquivConvLyer( basis_ae=basis_ae_layer, transformer=transformer, in_shape=shape_input, nr_basis=nr_basis, nr_filters=nr_filters, stride=stride, # stride_conv[idx], filter_sz=filter_sz, conv_padding=int(filter_sz / 2), bias=bias, index=idx) self.layers.append(layer) shape_input = layer.out_shape if stride_conv[idx] == 2: if self.pool_type == 'avg': self.layers.append(nn.AvgPool2d((2, 2), 2)) elif self.pool_type == 'max': self.layers.append(nn.MaxPool2d((2, 2), 2)) self.layers.append(nn.BatchNorm3d(nr_filters)) self.layers.append(nn.ReLU()) else: assert nr_basis is None assert filter_sz == 3 layer = nn.Conv2d(shape_input[0], nr_filters, filter_sz, stride=stride, padding=1, bias=bias) shape_input = (nr_filters, ) self.layers.append(layer) if stride_conv[idx] == 2: if self.pool_type == 'avg': self.layers.append(nn.AvgPool2d((2, 2), 2)) elif self.pool_type == 'max': self.layers.append(nn.MaxPool2d((2, 2), 2)) self.layers.append(nn.BatchNorm2d(nr_filters)) self.layers.append(nn.ReLU()) if len(onebyoneconv) != 0: if self.basis_equiv_layers_type != 'conv': self.layers.append(nn.AdaptiveAvgPool3d((1, None, None))) for sz in onebyoneconv: self.layers.append( nn.Conv1d(in_channels=shape_input[0], out_channels=sz, kernel_size=1)) # if basis_equiv_layers_type != 'conv': # shape_input = (sz, shape_input[1]) # self.layers.append(nn.BatchNorm3d(sz)) # else: shape_input = (sz, ) self.layers.append(nn.BatchNorm2d(sz)) self.layers.append(nn.ReLU()) if last_layer_type == 'conv1x1': self.layers.append( nn.Conv1d(in_channels=shape_input[0], out_channels=sz_output, kernel_size=1)) if last_layer_type == 'group1x1': layer = BasisEquivConvLyer( basis_ae=None, transformer=transformer, in_shape=shape_input, nr_basis=0, nr_filters=sz_output, stride=1, # stride_conv[idx], filter_sz=1, conv_padding=0, bias=bias, index=len(basis_equiv_layers)) self.layers.append(layer) if self.pool_type == 'avg' or self.pool_type == 'stride': if self.basis_equiv_layers_type != 'conv' and len( onebyoneconv) == 0: self.layers.append(nn.AdaptiveAvgPool3d((1, 1, 1))) else: self.layers.append(nn.AdaptiveAvgPool2d((1, 1))) elif self.pool_type == 'max': if self.basis_equiv_layers_type != 'conv' and len( onebyoneconv) == 0: self.layers.append(nn.AdaptiveMaxPool3d((1, 1, 1))) else: self.layers.append(nn.AdaptiveMaxPool2d((1, 1))) if len(fc_sizes) != 0: for sz in fc_sizes: self.layers.append(nn.Linear(shape_input[0], sz)) self.layers.append(nn.BatchNorm1d(sz)) self.layers.append(nn.ReLU()) shape_input = (sz, ) # self.layers.append(nn.Linear(shape_input[0], sz_output)) if last_layer_type == 'linear': self.layers.append(nn.Linear(shape_input[0], sz_output))
def forward(self, x, s_x=torch.FloatTensor(1,1,3,473,473).cuda(), s_y=torch.FloatTensor(1,1,473,473).cuda(), y=None): x_size = x.size() assert (x_size[2]-1) % 8 == 0 and (x_size[3]-1) % 8 == 0 h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1) w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1) # Éú³É Query Feature with torch.no_grad(): query_feat_0 = self.layer0(x) query_feat_1 = self.layer1(query_feat_0) query_feat_2 = self.layer2(query_feat_1) query_feat_3 = self.layer3(query_feat_2) query_feat_4 = self.layer4(query_feat_3) if self.vgg: query_feat_2 = F.interpolate(query_feat_2, size=(query_feat_3.size(2),query_feat_3.size(3)), mode='bilinear', align_corners=True) query_feat = torch.cat([query_feat_3, query_feat_2], 1) query_feat = self.down_query(query_feat) # Éú³É Support Feature supp_feat_list = [] final_supp_list = [] mask_list = [] for i in range(self.shot): mask = (s_y[:,i,:,:] == 1).float().unsqueeze(1) mask_list.append(mask) with torch.no_grad(): supp_feat_0 = self.layer0(s_x[:,i,:,:,:]) supp_feat_1 = self.layer1(supp_feat_0) supp_feat_2 = self.layer2(supp_feat_1) supp_feat_3 = self.layer3(supp_feat_2) mask = F.interpolate(mask, size=(supp_feat_3.size(2), supp_feat_3.size(3)), mode='bilinear', align_corners=True) supp_feat_4 = self.layer4(supp_feat_3*mask) final_supp_list.append(supp_feat_4) if self.vgg: supp_feat_2 = F.interpolate(supp_feat_2, size=(supp_feat_3.size(2),supp_feat_3.size(3)), mode='bilinear', align_corners=True) supp_feat = torch.cat([supp_feat_3, supp_feat_2], 1) supp_feat = self.down_supp(supp_feat) supp_feat = Weighted_GAP(supp_feat, mask) supp_feat_list.append(supp_feat) corr_query_mask_list = [] cosine_eps = 1e-7 for i, tmp_supp_feat in enumerate(final_supp_list): resize_size = tmp_supp_feat.size(2) tmp_mask = F.interpolate(mask_list[i], size=(resize_size, resize_size), mode='bilinear', align_corners=True) tmp_supp_feat_4 = tmp_supp_feat * tmp_mask q = query_feat_4 s = tmp_supp_feat_4 bsize, ch_sz, sp_sz, _ = q.size()[:] tmp_query = q tmp_query = tmp_query.contiguous().view(bsize, ch_sz, -1) tmp_query_norm = torch.norm(tmp_query, 2, 1, True) tmp_supp = s tmp_supp = tmp_supp.contiguous().view(bsize, ch_sz, -1) tmp_supp = tmp_supp.contiguous().permute(0, 2, 1) tmp_supp_norm = torch.norm(tmp_supp, 2, 2, True) similarity = torch.bmm(tmp_supp, tmp_query)/(torch.bmm(tmp_supp_norm, tmp_query_norm) + cosine_eps) similarity = similarity.max(1)[0].view(bsize, sp_sz*sp_sz) similarity = (similarity - similarity.min(1)[0].unsqueeze(1))/(similarity.max(1)[0].unsqueeze(1) - similarity.min(1)[0].unsqueeze(1) + cosine_eps) corr_query = similarity.view(bsize, 1, sp_sz, sp_sz) corr_query = F.interpolate(corr_query, size=(query_feat_3.size()[2], query_feat_3.size()[3]), mode='bilinear', align_corners=True) corr_query_mask_list.append(corr_query) corr_query_mask = torch.cat(corr_query_mask_list, 1).mean(1).unsqueeze(1) corr_query_mask = F.interpolate(corr_query_mask, size=(query_feat.size(2), query_feat.size(3)), mode='bilinear', align_corners=True) if self.shot > 1: supp_feat = supp_feat_list[0] for i in range(1, len(supp_feat_list)): supp_feat += supp_feat_list[i] supp_feat /= len(supp_feat_list) out_list = [] pyramid_feat_list = [] for idx, tmp_bin in enumerate(self.pyramid_bins): if tmp_bin <= 1.0: bin = int(query_feat.shape[2] * tmp_bin) query_feat_bin = nn.AdaptiveAvgPool2d(bin)(query_feat) else: bin = tmp_bin query_feat_bin = self.avgpool_list[idx](query_feat) supp_feat_bin = supp_feat.expand(-1, -1, bin, bin) corr_mask_bin = F.interpolate(corr_query_mask, size=(bin, bin), mode='bilinear', align_corners=True) merge_feat_bin = torch.cat([query_feat_bin, supp_feat_bin, corr_mask_bin], 1) merge_feat_bin = self.init_merge[idx](merge_feat_bin) if idx >= 1: pre_feat_bin = pyramid_feat_list[idx-1].clone() pre_feat_bin = F.interpolate(pre_feat_bin, size=(bin, bin), mode='bilinear', align_corners=True) rec_feat_bin = torch.cat([merge_feat_bin, pre_feat_bin], 1) merge_feat_bin = self.alpha_conv[idx-1](rec_feat_bin) + merge_feat_bin merge_feat_bin = self.beta_conv[idx](merge_feat_bin) + merge_feat_bin inner_out_bin = self.inner_cls[idx](merge_feat_bin) merge_feat_bin = F.interpolate(merge_feat_bin, size=(query_feat.size(2), query_feat.size(3)), mode='bilinear', align_corners=True) pyramid_feat_list.append(merge_feat_bin) out_list.append(inner_out_bin) query_feat = torch.cat(pyramid_feat_list, 1) query_feat = self.res1(query_feat) query_feat = self.res2(query_feat) + query_feat out = self.cls(query_feat) # Output Part if self.zoom_factor != 1: out = F.interpolate(out, size=(h, w), mode='bilinear', align_corners=True) if self.training: main_loss = self.criterion(out, y.long()) aux_loss = torch.zeros_like(main_loss).cuda() for idx_k in range(len(out_list)): inner_out = out_list[idx_k] inner_out = F.interpolate(inner_out, size=(h, w), mode='bilinear', align_corners=True) aux_loss = aux_loss + self.criterion(inner_out, y.long()) aux_loss = aux_loss / len(out_list) return out.max(1)[1], main_loss, aux_loss else: return out
def __init__(self, layers=50, classes=2, zoom_factor=8, \ criterion=nn.CrossEntropyLoss(ignore_index=255), BatchNorm=nn.BatchNorm2d, \ pretrained=True, sync_bn=True, shot=1, ppm_scales=[60, 30, 15, 8], vgg=False): super(PFENet, self).__init__() assert layers in [50, 101, 152] print(ppm_scales) assert classes > 1 from torch.nn import BatchNorm2d as BatchNorm self.zoom_factor = zoom_factor self.criterion = criterion self.shot = shot self.ppm_scales = ppm_scales self.vgg = vgg models.BatchNorm = BatchNorm if self.vgg: print('INFO: Using VGG_16 bn') vgg_models.BatchNorm = BatchNorm vgg16 = vgg_models.vgg16_bn(pretrained=pretrained) print(vgg16) self.layer0, self.layer1, self.layer2, \ self.layer3, self.layer4 = get_vgg16_layer(vgg16) else: print('INFO: Using ResNet {}'.format(layers)) if layers == 50: resnet = models.resnet50(pretrained=pretrained) elif layers == 101: resnet = models.resnet101(pretrained=pretrained) else: resnet = models.resnet152(pretrained=pretrained) self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu1, resnet.conv2, resnet.bn2, resnet.relu2, resnet.conv3, resnet.bn3, resnet.relu3, resnet.maxpool) self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4 for n, m in self.layer3.named_modules(): if 'conv2' in n: m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1) elif 'downsample.0' in n: m.stride = (1, 1) for n, m in self.layer4.named_modules(): if 'conv2' in n: m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1) elif 'downsample.0' in n: m.stride = (1, 1) reduce_dim = 256 if self.vgg: fea_dim = 512 + 256 else: fea_dim = 1024 + 512 self.cls = nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.1), nn.Conv2d(reduce_dim, classes, kernel_size=1) ) self.down_query = nn.Sequential( nn.Conv2d(fea_dim, reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5) ) self.down_supp = nn.Sequential( nn.Conv2d(fea_dim, reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.5) ) self.pyramid_bins = ppm_scales self.avgpool_list = [] for bin in self.pyramid_bins: if bin > 1: self.avgpool_list.append( nn.AdaptiveAvgPool2d(bin) ) factor = 1 mask_add_num = 1 self.init_merge = [] self.beta_conv = [] self.inner_cls = [] for bin in self.pyramid_bins: self.init_merge.append(nn.Sequential( nn.Conv2d(reduce_dim*2 + mask_add_num, reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), )) self.beta_conv.append(nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True) )) self.inner_cls.append(nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Dropout2d(p=0.1), nn.Conv2d(reduce_dim, classes, kernel_size=1) )) self.init_merge = nn.ModuleList(self.init_merge) self.beta_conv = nn.ModuleList(self.beta_conv) self.inner_cls = nn.ModuleList(self.inner_cls) self.res1 = nn.Sequential( nn.Conv2d(reduce_dim*len(self.pyramid_bins), reduce_dim, kernel_size=1, padding=0, bias=False), nn.ReLU(inplace=True), ) self.res2 = nn.Sequential( nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False), nn.ReLU(inplace=True), ) self.GAP = nn.AdaptiveAvgPool2d(1) self.alpha_conv = [] for idx in range(len(self.pyramid_bins)-1): self.alpha_conv.append(nn.Sequential( nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0, bias=False), nn.ReLU() )) self.alpha_conv = nn.ModuleList(self.alpha_conv)
def __init__(self, block=Bottleneck, layers=[3, 5, 11, 7], class_num=4, label_num=5, dropout=0.7, mode='multi', vocab_size=0): super(SlowFast, self).__init__() self.mode = mode self.embed_dim = 64 self.audio_size = 1024 self.class_num = class_num self.label_num = label_num self.fast_inplanes = 8 self.fast_conv1 = nn.Conv3d(3, 8, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=(2, 3, 3), bias=False) self.fast_bn1 = nn.BatchNorm3d(8) self.fast_relu = nn.ReLU(inplace=True) self.fast_maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) self.fast_res2 = self._make_layer_fast(block, 8, layers[0], head_conv=5) self.fast_res3 = self._make_layer_fast(block, 16, layers[1], stride=2, head_conv=5) self.fast_res4 = self._make_layer_fast(block, 32, layers[2], stride=2, head_conv=3) self.fast_res5 = self._make_layer_fast(block, 64, layers[3], stride=2, head_conv=3) self.lateral_p1 = nn.Conv3d(8, 8 * 2, kernel_size=(5, 1, 1), stride=(8, 1, 1), bias=False, padding=(2, 0, 0)) self.lateral_res2 = nn.Conv3d(32, 32 * 2, kernel_size=(5, 1, 1), stride=(8, 1, 1), bias=False, padding=(2, 0, 0)) self.lateral_res3 = nn.Conv3d(64, 64 * 2, kernel_size=(5, 1, 1), stride=(8, 1, 1), bias=False, padding=(2, 0, 0)) self.lateral_res4 = nn.Conv3d(128, 128 * 2, kernel_size=(5, 1, 1), stride=(8, 1, 1), bias=False, padding=(2, 0, 0)) self.slow_inplanes = 64 + 64 // 8 * 2 self.slow_conv1 = nn.Conv3d(3, 64, kernel_size=(1, 7, 7), stride=(1, 2, 2), padding=(0, 3, 3), bias=False) self.slow_bn1 = nn.BatchNorm3d(64) self.slow_relu = nn.ReLU(inplace=True) self.slow_maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)) self.slow_res2 = self._make_layer_slow(block, 64, layers[0], head_conv=3) self.slow_res3 = self._make_layer_slow(block, 128, layers[1], stride=2, head_conv=3) self.slow_res4 = self._make_layer_slow(block, 256, layers[2], stride=2, head_conv=3) self.slow_res5 = self._make_layer_slow(block, 512, layers[3], stride=2, head_conv=3) self.dp = nn.Dropout(dropout) self.classifier1 = nn.ModuleList([ nn.Linear(128 * 32 + self.embed_dim + self.audio_size, class_num) for _ in range(label_num) ]) #self.classifier2 = nn.ModuleList([nn.Linear(256, class_num) for _ in range(label_num)]) #self.classifier = nn.Linear(self.fast_inplanes + 2048 + self.embed_dim + self.audio_size, class_num * label_num) # text # self.embedding = nn.EmbeddingBag(vocab_size, self.embed_dim) #self.textfc1 = nn.Linear(self.embed_dim, self.embed_dim) #self.textfc2 = nn.Linear(self.embed_dim, self.embed_dim) self.text_init_weights() # genre fc, age fc # self.genrefc = nn.Linear(128 * 32 + self.embed_dim + self.audio_size, 9) self.agefc = nn.Linear(128 * 32 + self.embed_dim + self.audio_size, 4) self.extract_audio = nn.Sequential(nn.Conv2d(1, 32, kernel_size=(3,5), stride=(1,2), padding=(1,0)),\ nn.BatchNorm2d(32), nn.LeakyReLU(), nn.MaxPool2d((1,2)), nn.Conv2d(32, 64, kernel_size=(3,5), stride=(1,2), padding=(1,1)), nn.BatchNorm2d(64), nn.LeakyReLU(), nn.MaxPool2d((1,2)), nn.Conv2d(64, 128, kernel_size=(3,5), stride=(1,2), padding=(1,1)), nn.BatchNorm2d(128), nn.LeakyReLU(), nn.MaxPool2d((1,2)), nn.Conv2d(128, 256, kernel_size=(3,5), stride=(1,2), padding=(0,1)), nn.BatchNorm2d(256), nn.LeakyReLU(), nn.MaxPool2d(2), nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=(0,1)), nn.AdaptiveAvgPool2d(2) ) self.lstm = nn.LSTM(self.fast_inplanes + 2048, hidden_size=128, num_layers=2, batch_first=False, bidirectional=True, dropout=0.2)
def __init__(self, n_classes, use_instance_seg, use_coords, pixel_embedding_dim=16): super(Architecture, self).__init__() self.n_classes = n_classes self.use_instance_seg = use_instance_seg self.use_coords = use_coords self.cnn = BaseCNN(use_coords=self.use_coords) self.renet1 = ReNet(n_input=256, n_units=100) self.renet2 = ReNet(n_input=100 * 2, n_units=100) self.upsampling1 = nn.ConvTranspose2d( in_channels=100 * 2, out_channels=100, kernel_size=(2, 2), stride=(2, 2), ) self.relu1 = nn.ReLU() self.upsampling2 = nn.ConvTranspose2d( in_channels=100 + self.cnn.n_filters[1], out_channels=100, kernel_size=(2, 2), stride=(2, 2), ) self.relu2 = nn.ReLU() self.sem_seg_output = nn.Conv2d(in_channels=100 + self.cnn.n_filters[0], out_channels=self.n_classes, kernel_size=(1, 1), stride=(1, 1)) if self.use_instance_seg: self.ins_seg_output = nn.Conv2d(in_channels=100 + self.cnn.n_filters[0], out_channels=pixel_embedding_dim, kernel_size=(1, 1), stride=(1, 1)) self.ins_cls_cnn = nn.Sequential() self.ins_cls_cnn.add_module("pool1", nn.MaxPool2d(2, stride=2)) self.ins_cls_cnn.add_module( "conv1", nn.Conv2d(in_channels=100 * 2, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu1", nn.ReLU()) self.ins_cls_cnn.add_module( "conv2", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu2", nn.ReLU()) self.ins_cls_cnn.add_module("pool2", nn.MaxPool2d(kernel_size=2, stride=2)) self.ins_cls_cnn.add_module( "conv3", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu3", nn.ReLU()) self.ins_cls_cnn.add_module( "conv4", nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))) self.ins_cls_cnn.add_module("relu4", nn.ReLU()) self.ins_cls_cnn.add_module("pool3", nn.AdaptiveAvgPool2d( (1, 1))) # b, nf, 1, 1 self.ins_cls_out = nn.Sequential() self.ins_cls_out.add_module("linear", nn.Linear(64, 1)) self.ins_cls_out.add_module("sigmoid", nn.Sigmoid())
def __init__(self, dropout_rate): """ This function instantiates all the model layers """ super(Net, self).__init__() self.convblock1 = nn.Sequential( nn.Conv2d( in_channels=3, out_channels=32, kernel_size=3, padding=1), # Input: 32x32x3 | Output: 32x32x32 | RF: 3x3 nn.ReLU(), nn.BatchNorm2d(32), nn.Dropout(dropout_rate), nn.Conv2d( in_channels=32, out_channels=64, kernel_size=3, padding=1), # Input: 32x32x32 | Output: 32x32x64 | RF: 5x5 nn.ReLU(), nn.BatchNorm2d(64), nn.Dropout(dropout_rate)) self.transblock1 = nn.Sequential( nn.MaxPool2d(2, 2), # Input: 32x32x64 | Output: 16x16x64 | RF: 6x6 nn.Conv2d( in_channels=64, out_channels=32, kernel_size=1) # Input: 16x16x64 | Output: 16x16x32 | RF: 6x6 ) self.convblock2 = nn.Sequential( nn.Conv2d( in_channels=32, out_channels=32, kernel_size=3, padding=1), # Input: 16x16x32 | Output: 16x16x32 | RF: 10x10 nn.ReLU(), nn.BatchNorm2d(32), nn.Dropout(dropout_rate), nn.Conv2d( in_channels=32, out_channels=64, kernel_size=3, padding=1), # Input: 16x16x32 | Output: 16x16x64 | RF: 14x14 nn.ReLU(), nn.BatchNorm2d(64), nn.Dropout(dropout_rate)) self.transblock2 = nn.Sequential( nn.MaxPool2d(2, 2), # Input: 16x16x64 | Output: 8x8x64 | RF: 16x16 nn.Conv2d( in_channels=64, out_channels=32, kernel_size=1) # Input: 8x8x64 | Output: 8x8x32 | RF: 16x16 ) self.convblock3 = nn.Sequential( nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1), # Input: 8x8x32 | Output: 8x8x32 | RF: 24x24 nn.ReLU(), nn.BatchNorm2d(32), nn.Dropout(dropout_rate), # Depthwise separable convolution nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, groups=32, padding=1), # Input: 8x8x32 | Output: 8x8x32 | RF: 32x32 nn.Conv2d( in_channels=32, out_channels=64, kernel_size=1), # Input: 8x8x32 | Output: 8x8x64 | RF: 32x32 nn.ReLU(), nn.BatchNorm2d(64), nn.Dropout(dropout_rate)) self.transblock3 = nn.Sequential( nn.MaxPool2d(2, 2), # Input: 8x8x64 | Output: 4x4x64 | RF: 36x36 nn.Conv2d( in_channels=64, out_channels=32, kernel_size=1) # Input: 4x4x64 | Output: 4x4x32 | RF: 36x36 ) self.convblock4 = nn.Sequential( nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, padding=1), # Input: 4x4x32 | Output: 4x4x32 | RF: 52x52 nn.ReLU(), nn.BatchNorm2d(32), nn.Dropout(dropout_rate), # Dilated convolution nn.Conv2d( in_channels=32, out_channels=64, kernel_size=3, padding=1, dilation=2), # Input: 4x4x32 | Output: 4x4x64 | RF: 84x84 nn.ReLU(), nn.BatchNorm2d(64), nn.Dropout(dropout_rate)) self.gap = nn.Sequential(nn.AdaptiveAvgPool2d( 1)) # Input: 4x4x64 | Output: 1x1x64 | RF: 108x108 self.fc = nn.Sequential(nn.Linear(64, 10))
def __init__( self, block_params: BlockParams, num_classes: int = 1000, stem_width: int = 32, stem_type: Optional[Callable[..., nn.Module]] = None, block_type: Optional[Callable[..., nn.Module]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None, activation: Optional[Callable[..., nn.Module]] = None, ) -> None: super().__init__() _log_api_usage_once(self) if stem_type is None: stem_type = SimpleStemIN if norm_layer is None: norm_layer = nn.BatchNorm2d if block_type is None: block_type = ResBottleneckBlock if activation is None: activation = nn.ReLU # Ad hoc stem self.stem = stem_type( 3, # width_in stem_width, norm_layer, activation, ) current_width = stem_width blocks = [] for i, ( width_out, stride, depth, group_width, bottleneck_multiplier, ) in enumerate(block_params._get_expanded_params()): blocks.append(( f"block{i+1}", AnyStage( current_width, width_out, stride, depth, block_type, norm_layer, activation, group_width, bottleneck_multiplier, block_params.se_ratio, stage_index=i + 1, ), )) current_width = width_out self.trunk_output = nn.Sequential(OrderedDict(blocks)) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(in_features=current_width, out_features=num_classes) # Init weights and good to go self._reset_parameters()
def _prepare_base_model(self, base_model, config={}): print('=> base model: {}'.format(base_model)) if base_model.startswith('resnet'): self.base_model = getattr(torchvision.models, base_model)(True) if self.is_shift: print('Adding temporal shift...') from ops.temporal_shift import make_temporal_shift make_temporal_shift(self.base_model, self.num_segments, n_div=self.shift_div, place=self.shift_place, temporal_pool=self.temporal_pool, two_path=True) if self.tin: print('Adding temporal deformable conv...') from ops.temporal_interlace import make_temporal_interlace make_temporal_interlace(self.base_model, self.num_segments, shift_div=self.shift_div) if self.non_local: print('Adding non-local module...') from ops.non_local import make_non_local make_non_local(self.base_model, self.num_segments) self.base_model.last_layer_name = 'fc' self.input_size = 224 self.input_mean = [0.485, 0.456, 0.406] self.input_std = [0.229, 0.224, 0.225] self.base_model.avgpool = nn.AdaptiveAvgPool2d(1) if self.modality == 'Flow': self.input_mean = [0.5] self.input_std = [np.mean(self.input_std)] elif self.modality == 'RGBDiff': self.input_mean = [0.485, 0.456, 0.406 ] + [0] * 3 * self.new_length self.input_std = self.input_std + [ np.mean(self.input_std) * 2 ] * 3 * self.new_length elif base_model == 'BNInception': from archs.bn_inception import bninception self.base_model = bninception(pretrained=self.pretrain) self.input_size = self.base_model.input_size self.input_mean = self.base_model.mean self.input_std = self.base_model.std self.base_model.last_layer_name = 'fc' if self.modality == 'Flow': self.input_mean = [128] elif self.modality == 'RGBDiff': self.input_mean = self.input_mean * (1 + self.new_length) if self.is_shift: print('Adding temporal shift...') self.base_model.build_temporal_ops( self.num_segments, is_temporal_shift=self.shift_place, shift_div=self.shift_div)
def train(): model = resnet50(pretrained=False) model.avgpool = nn.AdaptiveAvgPool2d((1, 1)) model.fc = nn.Linear(model.fc.in_features, out_dim) print(model) optimizer = torch.optim.Adam(model.parameters(), lr=LR) lossFunc = nn.MSELoss() # the target is not one-hotted # 读取训练集和测试集 train_x = np.load('./data/train_data.npy') train_y = pd.read_pickle('./data/train_y.pkl').values test_x = np.load('./data/test_data.npy') # 划分训练集和验证集 train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.2, random_state=2018) print('划分后 train size is {}, test_size is {}'.format( train_x.shape, val_x.shape)) # 转为tensor tensor_train_x = torch.from_numpy(train_x).float() tensor_train_y = torch.from_numpy(train_y).float() tensor_val_x = torch.from_numpy(val_x).float() tensor_val_y = torch.from_numpy(val_y).float() tensor_test_x = torch.from_numpy(test_x).float() train_data = Data.TensorDataset(tensor_train_x, tensor_train_y) val_data = Data.TensorDataset(tensor_val_x, tensor_val_y) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=15) val_loader = Data.DataLoader(dataset=val_data, batch_size=BATCH_SIZE, shuffle=True, num_workers=15) # 标签和对应属性表 label_data = pd.read_csv(osp.join(train_dir, 'attributes_per_class.txt'), sep='\t', header=None) def get_label(predict, data): print(data.columns) label_index = np.zeros((predict.shape[0], )) label_name = data[0].values.tolist() data30 = data.iloc[:, 1:].values print(data30.shape) for i in range(predict.shape[0]): min_dist = 1000000. for j in range(len(label_name)): # 比较欧式距离 cur_dist = np.sqrt(np.sum(np.square(data30[j] - predict[i]))) if min_dist > cur_dist: print('min_dist:', min_dist, ' |cur_dist: ', cur_dist, ' |label name: ', label_name[j]) min_dist = cur_dist label_index[i] = j predict_label = [label_name[int(i)] for i in label_index] return np.array(predict_label) for epoch in range(EPOCH): train_loss = 0. # model train print('Training...') for step, (batch_x, batch_y) in enumerate(train_loader): predict = model(batch_x) loss = lossFunc(predict, batch_y) train_loss += loss.data[0] optimizer.zero_grad() loss.backward() optimizer.step() # model eval model.eval() eval_loss = 0. print('Validation....') for step, (batch_x, batch_y) in enumerate(val_loader): predict = model(batch_x) loss = lossFunc(predict, batch_y) eval_loss += loss.data[0] print('Epoch: ', epoch, '|Train Loss: ', train_loss, '|Val Loss: ', eval_loss) print('-' * 10) print('Testing....') model.eval() predict = model(tensor_test_x) predict_label = get_label(predict.data.numpy(), label_data) print(predict_label.shape) # submit result = pd.read_csv(osp.join(test_dir, 'image.txt'), sep='\t', header=None) result['label'] = predict_label result.to_csv('resnet18_submit.txt', header=None, index=False, sep='\t') torch.save(model.state_dict(), './resnet18_params.pkl')
def __init__(self, num_in, num_class): super(GlobalpoolFC, self).__init__() self.pool = nn.AdaptiveAvgPool2d(output_size=1) self.fc = nn.Linear(num_in, num_class)
def __init__(self, block, layers, num_classes=1000, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None): super(ResNet, self).__init__() inplace = True if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead # ----------------------------- # modified replace_stride_with_dilation = [False, False, False] # ----------------------------- if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group #layer for RGB input self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d(1) #layer for merge self.att_rgb = SELayer(64) self.att_rgb_layer1 = SELayer(256) self.att_rgb_layer2 = SELayer(512) self.att_d_layer3 = SELayer(1024) self.att_rgb_layer3 = SELayer(1024) self.att_d_layer4 = SELayer(2048) self.att_rgb_layer4 = SELayer(2048) #layer for depth layer inplace = True self.conv1_7x7_s2 = nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3)) self.conv1_7x7_s2_bn = nn.BatchNorm2d(64, affine=True) self.conv1_relu_7x7 = nn.ReLU(inplace) self.pool1_3x3_s2 = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) self.conv2_3x3_reduce = nn.Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1)) self.conv2_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True) self.conv2_relu_3x3_reduce = nn.ReLU(inplace) self.conv2_3x3 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.conv2_3x3_bn = nn.BatchNorm2d(128, affine=True) self.conv2_relu_3x3 = nn.ReLU(inplace) self.pool2_3x3_s2 = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) self.inception_3a_1x1 = nn.Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_3a_1x1_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_1x1 = nn.ReLU(inplace) self.inception_3a_3x3_reduce = nn.Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_3a_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_3x3_reduce = nn.ReLU(inplace) self.inception_3a_3x3 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_3a_3x3_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_3x3 = nn.ReLU(inplace) self.inception_3a_double_3x3_reduce = nn.Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_3a_double_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_double_3x3_reduce = nn.ReLU(inplace) self.inception_3a_double_3x3_1 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_3a_double_3x3_1_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_double_3x3_1 = nn.ReLU(inplace) self.inception_3a_double_3x3_2 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_3a_double_3x3_2_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_double_3x3_2 = nn.ReLU(inplace) self.inception_3a_pool = nn.AvgPool2d(3, stride=1, padding=1, ceil_mode=True, count_include_pad=True) self.inception_3a_pool_proj = nn.Conv2d(128, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_3a_pool_proj_bn = nn.BatchNorm2d(64, affine=True) self.inception_3a_relu_pool_proj = nn.ReLU(inplace) self.inception_3c_pool = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) self.inception_4a_1x1 = nn.Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) self.inception_4a_1x1_bn = nn.BatchNorm2d(128, affine=True) self.inception_4a_relu_1x1 = nn.ReLU(inplace) self.inception_4a_3x3_reduce = nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_4a_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True) self.inception_4a_relu_3x3_reduce = nn.ReLU(inplace) self.inception_4a_3x3 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_4a_3x3_bn = nn.BatchNorm2d(128, affine=True) self.inception_4a_relu_3x3 = nn.ReLU(inplace) self.inception_4a_double_3x3_reduce = nn.Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1)) self.inception_4a_double_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True) self.inception_4a_relu_double_3x3_reduce = nn.ReLU(inplace) self.inception_4a_double_3x3_1 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_4a_double_3x3_1_bn = nn.BatchNorm2d(128, affine=True) self.inception_4a_relu_double_3x3_1 = nn.ReLU(inplace) self.inception_4a_double_3x3_2 = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_4a_double_3x3_2_bn = nn.BatchNorm2d(128, affine=True) self.inception_4a_relu_double_3x3_2 = nn.ReLU(inplace) self.inception_4a_pool = nn.AvgPool2d(3, stride=1, padding=1, ceil_mode=True, count_include_pad=True) self.inception_4a_pool_proj = nn.Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1)) self.inception_4a_pool_proj_bn = nn.BatchNorm2d(128, affine=True) self.inception_4a_relu_pool_proj = nn.ReLU(inplace) self.inception_4e_pool = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) self.inception_5a_1x1 = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) self.inception_5a_1x1_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_1x1 = nn.ReLU(inplace) self.inception_5a_3x3_reduce = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) self.inception_5a_3x3_reduce_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_3x3_reduce = nn.ReLU(inplace) self.inception_5a_3x3 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_5a_3x3_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_3x3 = nn.ReLU(inplace) self.inception_5a_double_3x3_reduce = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) self.inception_5a_double_3x3_reduce_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_double_3x3_reduce = nn.ReLU(inplace) self.inception_5a_double_3x3_1 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_5a_double_3x3_1_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_double_3x3_1 = nn.ReLU(inplace) self.inception_5a_double_3x3_2 = nn.Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_5a_double_3x3_2_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_double_3x3_2 = nn.ReLU(inplace) self.inception_5a_pool = nn.AvgPool2d(3, stride=1, padding=1, ceil_mode=True, count_include_pad=True) self.inception_5a_pool_proj = nn.Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1)) self.inception_5a_pool_proj_bn = nn.BatchNorm2d(256, affine=True) self.inception_5a_relu_pool_proj = nn.ReLU(inplace) self.inception_5c_pool = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) self.inception_6a_1x1 = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) self.inception_6a_1x1_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_1x1 = nn.ReLU(inplace) self.inception_6a_3x3_reduce = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) self.inception_6a_3x3_reduce_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_3x3_reduce = nn.ReLU(inplace) self.inception_6a_3x3 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_6a_3x3_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_3x3 = nn.ReLU(inplace) self.inception_6a_double_3x3_reduce = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) self.inception_6a_double_3x3_reduce_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_double_3x3_reduce = nn.ReLU(inplace) self.inception_6a_double_3x3_1 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_6a_double_3x3_1_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_double_3x3_1 = nn.ReLU(inplace) self.inception_6a_double_3x3_2 = nn.Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) self.inception_6a_double_3x3_2_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_double_3x3_2 = nn.ReLU(inplace) self.inception_6a_pool = nn.AvgPool2d(3, stride=1, padding=1, ceil_mode=True, count_include_pad=True) self.inception_6a_pool_proj = nn.Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1)) self.inception_6a_pool_proj_bn = nn.BatchNorm2d(512, affine=True) self.inception_6a_relu_pool_proj = nn.ReLU(inplace) self.inception_6e_pool = nn.MaxPool2d((3, 3), stride=(2, 2), dilation=(1, 1), ceil_mode=True) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0)
def __init__(self, sz=None): super().__init__() sz = sz or (1, 1) self.ap = nn.AdaptiveAvgPool2d(sz) self.mp = nn.AdaptiveMaxPool2d(sz)
def __init__(self, in_channels=None, out_channels=None, **kwargs): super().__init__(in_channels, out_channels, **kwargs) self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
def __init__(self, **kwargs): super(MobileNetV2, self).__init__() input_channel = kwargs.pop("input_channels", 3) num_classes = kwargs.pop("num_classes", 1000) widen_factor = kwargs.pop("widen_factor", 1.0) mode = kwargs.pop("mode", "ori").lower() assert mode in {"ori", "lv", "tiny"} # TODO: add new activation activation = kwargs.pop("activation", "relu").lower() assert activation in {"relu", "prelu"} interverted_residual_setting = INTERVERTED_RESIDUAL_SETTING[mode] input_channels = INPUT_CHANNELS[mode] last_channel = LAST_CHANNEL[mode] block = InvertedResidual # building first layer # assert input_size % 32 == 0 input_channels = int(input_channels * widen_factor) self.last_channel = int( last_channel * widen_factor) if widen_factor > 1.0 else last_channel self.features = [ conv_bn(input_channel, input_channels, 2, activation=activation) ] # building inverted residual blocks for t, c, n, s in interverted_residual_setting: output_channels = int(c * widen_factor) for i in range(n): if i == 0: self.features.append( block(input_channels, output_channels, s, expand_ratio=t, activation=activation)) else: self.features.append( block(input_channels, output_channels, 1, expand_ratio=t, activation=activation)) input_channels = output_channels # building last several layers self.features.append( conv_1x1_bn(input_channels, self.last_channel, activation=activation)) # make it nn.Sequential self.features = nn.Sequential(*self.features) # building classifier # self.classifier = nn.Sequential( # nn.Dropout(0.2), # nn.Linear(self.last_channel, num_classes), # ) self.classifier = nn.Linear(self.last_channel, num_classes) self.avgpool = nn.AdaptiveAvgPool2d(1) self._initialize_weights()
def __init__(self, rgb=3): super(HrCNN, self).__init__() self.rgb = rgb self.ada_avg_pool2d = nn.AdaptiveAvgPool2d(output_size=(192, 128)) conv_init_mean = 0 conv_init_std = .1 xavier_normal_gain = 1 input_count = rgb self.bn_input = nn.BatchNorm2d(input_count) nn.init.normal_(self.bn_input.weight, conv_init_mean, conv_init_std) output_count = 64 self.conv_00 = nn.Conv2d(input_count, output_count, kernel_size=(15, 10), stride=1, padding=0) nn.init.xavier_normal_(self.conv_00.weight, gain=xavier_normal_gain) self.max_pool2d_00 = nn.MaxPool2d( kernel_size=(15, 10), stride=(2, 2), ) self.bn_00 = nn.BatchNorm2d(output_count) nn.init.normal_(self.bn_00.weight, conv_init_mean, conv_init_std) input_count = 64 self.conv_01 = nn.Conv2d(input_count, output_count, kernel_size=(15, 10), stride=1, padding=0) nn.init.xavier_normal_(self.conv_01.weight, gain=xavier_normal_gain) self.max_pool2d_01 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1)) self.bn_01 = nn.BatchNorm2d(output_count) nn.init.normal_(self.bn_01.weight, conv_init_mean, conv_init_std) output_count = 128 self.conv_10 = nn.Conv2d(input_count, output_count, kernel_size=(15, 10), stride=1, padding=0) nn.init.xavier_normal_(self.conv_10.weight, gain=xavier_normal_gain) self.max_pool2d_10 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1)) self.bn_10 = nn.BatchNorm2d(output_count) nn.init.normal_(self.bn_10.weight, conv_init_mean, conv_init_std) input_count = 128 output_count = 128 self.gcb = GCBlock(output_count) self.conv_20 = nn.Conv2d(input_count, output_count, kernel_size=(12, 10), stride=1, padding=0) self.max_pool2d_20 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1)) self.bn_20 = nn.BatchNorm2d(output_count) input_count = 128 self.conv_last = nn.Conv2d(input_count, 1, kernel_size=1, stride=1, padding=0) nn.init.xavier_normal_(self.conv_last.weight, gain=xavier_normal_gain) self.gradients = None
def __init__(self, output_blocks=[DEFAULT_BLOCK_INDEX], resize_input=True, normalize_input=True, requires_grad=False, use_fid_inception=True): """Build pretrained InceptionV3 Parameters ---------- output_blocks : list of int Indices of blocks to return features of. Possible values are: - 0: corresponds to output of first max pooling - 1: corresponds to output of second max pooling - 2: corresponds to output which is fed to aux classifier - 3: corresponds to output of final average pooling resize_input : bool If true, bilinearly resizes input to width and height 299 before feeding input to model. As the network without fully connected layers is fully convolutional, it should be able to handle inputs of arbitrary size, so resizing might not be strictly needed normalize_input : bool If true, scales the input from range (0, 1) to the range the pretrained Inception network expects, namely (-1, 1) requires_grad : bool If true, parameters of the model require gradients. Possibly useful for finetuning the network use_fid_inception : bool If true, uses the pretrained Inception model used in Tensorflow's FID implementation. If false, uses the pretrained Inception model available in torchvision. The FID Inception model has different weights and a slightly different structure from torchvision's Inception model. If you want to compute FID scores, you are strongly advised to set this parameter to true to get comparable results. """ super(InceptionV3, self).__init__() self.resize_input = resize_input self.normalize_input = normalize_input self.output_blocks = sorted(output_blocks) self.last_needed_block = max(output_blocks) assert self.last_needed_block <= 3, \ 'Last possible output block index is 3' self.blocks = nn.ModuleList() if use_fid_inception: inception = fid_inception_v3() else: inception = _inception_v3(pretrained=True) # Block 0: input to maxpool1 block0 = [ inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3, inception.Conv2d_2b_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block0)) # Block 1: maxpool1 to maxpool2 if self.last_needed_block >= 1: block1 = [ inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3, nn.MaxPool2d(kernel_size=3, stride=2) ] self.blocks.append(nn.Sequential(*block1)) # Block 2: maxpool2 to aux classifier if self.last_needed_block >= 2: block2 = [ inception.Mixed_5b, inception.Mixed_5c, inception.Mixed_5d, inception.Mixed_6a, inception.Mixed_6b, inception.Mixed_6c, inception.Mixed_6d, inception.Mixed_6e, ] self.blocks.append(nn.Sequential(*block2)) # Block 3: aux classifier to final avgpool if self.last_needed_block >= 3: block3 = [ inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c, nn.AdaptiveAvgPool2d(output_size=(1, 1)) ] self.blocks.append(nn.Sequential(*block3)) for param in self.parameters(): param.requires_grad = requires_grad
def __init__(self): self.dropout_value = 0.15 super(QuizDNN, self).__init__() self.convblock1 = nn.Sequential( nn.Conv2d(3, 32, kernel_size=(1, 1), padding=0, bias=False), nn.BatchNorm2d(32), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock2 = nn.Sequential( nn.Conv2d(32, 32, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(32), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock3 = nn.Sequential( nn.Conv2d(32, 32, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(32), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.pool1 = nn.MaxPool2d(2, 2) self.convblock4 = nn.Sequential( nn.Conv2d(32, 64, kernel_size=(1, 1), bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock5 = nn.Sequential( nn.Conv2d(64, 64, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock6 = nn.Sequential( nn.Conv2d(64, 64, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(64), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.pool2 = nn.MaxPool2d(2, 2) self.convblock7 = nn.Sequential( nn.Conv2d(64, 128, kernel_size=(1, 1), dilation=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock8 = nn.Sequential( nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.MP3 = nn.MaxPool2d(2, 2) self.convblock9 = nn.Sequential( nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.convblock10 = nn.Sequential( nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False), nn.BatchNorm2d(128), nn.ReLU(), nn.Dropout(self.dropout_value) ) self.gap = nn.AdaptiveAvgPool2d(output_size=(1, 1)) self.convblock11 = nn.Sequential( nn.Conv2d(128, 10, kernel_size=(1, 1), bias=False), )
def __init__(self, in_channel, out_channel): super(FPA, self).__init__() self.c15_1 = nn.Conv2d(in_channel, out_channel, kernel_size=15, stride=1, padding=7, bias=False) self.c11_1 = nn.Conv2d(in_channel, out_channel, kernel_size=11, stride=1, padding=5, bias=False) self.c7_1 = nn.Conv2d(in_channel, out_channel, kernel_size=7, stride=1, padding=3, bias=False) self.c3_1 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.c15_2 = nn.Conv2d(in_channel, out_channel, kernel_size=15, stride=1, padding=7, bias=False) self.c11_2 = nn.Conv2d(in_channel, out_channel, kernel_size=11, stride=1, padding=5, bias=False) self.c7_2 = nn.Conv2d(in_channel, out_channel, kernel_size=7, stride=1, padding=3, bias=False) self.c3_2 = nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=1, padding=1, bias=False) self.avg_pool = nn.AdaptiveAvgPool2d(1) self.c1_gpb = nn.Conv2d(in_channel, out_channel, kernel_size=1, bias=False) self.bn = nn.BatchNorm2d(out_channel) self.relu = nn.ReLU(inplace=True)
def _make_stage(self, features, size): prior = nn.AdaptiveAvgPool2d(output_size=(size, size)) conv = nn.Conv2d(features, features, kernel_size=1, bias=False) return nn.Sequential(prior, conv)
def __init__(self, block_units, width_factor, head_size=21843, zero_head=False): super().__init__() wf = width_factor # shortcut 'cause we'll use it a lot. # The following will be unreadable if we split lines. # pylint: disable=line-too-long self.root = nn.Sequential( OrderedDict([ ( "conv", StdConv2d(3, 64 * wf, kernel_size=7, stride=2, padding=3, bias=False), ), ("pad", nn.ConstantPad2d(1, 0)), ("pool", nn.MaxPool2d(kernel_size=3, stride=2, padding=0)), # The following is subtly not the same! # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)), ])) self.body = nn.Sequential( OrderedDict([ ( "block1", nn.Sequential( OrderedDict([( "unit01", PreActBottleneck( cin=64 * wf, cout=256 * wf, cmid=64 * wf), )] + [( f"unit{i:02d}", PreActBottleneck( cin=256 * wf, cout=256 * wf, cmid=64 * wf), ) for i in range(2, block_units[0] + 1)], )), ), ( "block2", nn.Sequential( OrderedDict([( "unit01", PreActBottleneck( cin=256 * wf, cout=512 * wf, cmid=128 * wf, stride=2, ), )] + [( f"unit{i:02d}", PreActBottleneck( cin=512 * wf, cout=512 * wf, cmid=128 * wf), ) for i in range(2, block_units[1] + 1)], )), ), ( "block3", nn.Sequential( OrderedDict([( "unit01", PreActBottleneck( cin=512 * wf, cout=1024 * wf, cmid=256 * wf, stride=2, ), )] + [( f"unit{i:02d}", PreActBottleneck( cin=1024 * wf, cout=1024 * wf, cmid=256 * wf), ) for i in range(2, block_units[2] + 1)], )), ), ( "block4", nn.Sequential( OrderedDict([( "unit01", PreActBottleneck( cin=1024 * wf, cout=2048 * wf, cmid=512 * wf, stride=2, ), )] + [( f"unit{i:02d}", PreActBottleneck( cin=2048 * wf, cout=2048 * wf, cmid=512 * wf), ) for i in range(2, block_units[3] + 1)], )), ), ])) # pylint: enable=line-too-long self.zero_head = zero_head self.head = nn.Sequential( OrderedDict([ ("gn", nn.GroupNorm(32, 2048 * wf)), ("relu", nn.ReLU(inplace=True)), ("avg", nn.AdaptiveAvgPool2d(output_size=1)), ("conv", nn.Conv2d(2048 * wf, head_size, kernel_size=1, bias=True)), ]))
def __init__(self, out_planes, is_training, criterion, ohem_criterion, pretrained_model=None, norm_layer=nn.BatchNorm2d): super(BiSeNet, self).__init__() self.context_path = xception39(pretrained_model, norm_layer=norm_layer) self.business_layer = [] self.is_training = is_training self.spatial_path = SpatialPath(3, 128, norm_layer) conv_channel = 128 self.global_context = nn.Sequential( nn.AdaptiveAvgPool2d(1), ConvBnRelu(256, conv_channel, 1, 1, 0, has_bn=True, has_relu=True, has_bias=False, norm_layer=norm_layer)) # stage = [256, 128, 64] arms = [ AttentionRefinement(256, conv_channel, norm_layer), AttentionRefinement(128, conv_channel, norm_layer) ] refines = [ ConvBnRelu(conv_channel, conv_channel, 3, 1, 1, has_bn=True, norm_layer=norm_layer, has_relu=True, has_bias=False), ConvBnRelu(conv_channel, conv_channel, 3, 1, 1, has_bn=True, norm_layer=norm_layer, has_relu=True, has_bias=False) ] if is_training: heads = [ BiSeNetHead(conv_channel, out_planes, 2, True, norm_layer), BiSeNetHead(conv_channel, out_planes, 1, True, norm_layer), BiSeNetHead(conv_channel * 2, out_planes, 1, False, norm_layer) ] else: heads = [ None, None, BiSeNetHead(conv_channel * 2, out_planes, 1, False, norm_layer) ] self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 1, norm_layer) self.arms = nn.ModuleList(arms) self.refines = nn.ModuleList(refines) self.heads = nn.ModuleList(heads) self.business_layer.append(self.spatial_path) self.business_layer.append(self.global_context) self.business_layer.append(self.arms) self.business_layer.append(self.refines) self.business_layer.append(self.heads) self.business_layer.append(self.ffm) if is_training: self.criterion = criterion self.ohem_criterion = ohem_criterion
def __init__(self, net_type='mixnet_m', input_size=32, num_classes=100, stem_channels=16, feature_size=1536, depth_multiplier=1.0): super(MixNet, self).__init__() if net_type == 'mixnet_s': config = self.mixnet_s stem_channels = 16 dropout_rate = 0.2 elif net_type == 'mixnet_m': config = self.mixnet_m stem_channels = 24 dropout_rate = 0.25 elif net_type == 'mixnet_l': config = self.mixnet_m stem_channels = 24 depth_multiplier *= 1.3 dropout_rate = 0.25 else: raise TypeError('Unsupported MixNet type') assert input_size % 32 == 0 # depth multiplier if depth_multiplier != 1.0: stem_channels = _RoundChannels(stem_channels * depth_multiplier) for i, conf in enumerate(config): conf_ls = list(conf) conf_ls[0] = _RoundChannels(conf_ls[0] * depth_multiplier) conf_ls[1] = _RoundChannels(conf_ls[1] * depth_multiplier) config[i] = tuple(conf_ls) # stem convolution self.stem_conv = Conv3x3Bn(3, stem_channels, 1) # building MixNet blocks layers = [] for in_channels, out_channels, kernel_size, expand_ksize, project_ksize, stride, expand_ratio, non_linear, se_ratio in config: layers.append( MixNetBlock(in_channels, out_channels, kernel_size=kernel_size, expand_ksize=expand_ksize, project_ksize=project_ksize, stride=stride, expand_ratio=expand_ratio, non_linear=non_linear, se_ratio=se_ratio)) self.layers = nn.Sequential(*layers) # last several layers self.head_conv = Conv1x1Bn(config[-1][1], feature_size) #self.avgpool = nn.AvgPool2d(input_size//32, stride=1) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(dropout_rate) self.classifier = nn.Linear(feature_size, num_classes) self._initialize_weights()
def __init__(self, c_in, c_out): super(_ImagePool, self).__init__() self.pool = nn.AdaptiveAvgPool2d(1) self.conv = _ConvBnReLU(c_in, c_out, 1, 1, 0, 1)
def __init__(self, block, layers, groups, reduction, dropout_p=0.2, inplanes=128, input_3x3=True, downsample_kernel_size=3, downsample_padding=1, num_classes=1000): """ Parameters ---------- block (nn.Module): Bottleneck class. - For SENet154: SEBottleneck - For SE-ResNet models: SEResNetBottleneck - For SE-ResNeXt models: SEResNeXtBottleneck layers (list of ints): Number of residual blocks for 4 layers of the network (layer1...layer4). groups (int): Number of groups for the 3x3 convolution in each bottleneck block. - For SENet154: 64 - For SE-ResNet models: 1 - For SE-ResNeXt models: 32 reduction (int): Reduction ratio for Squeeze-and-Excitation modules. - For all models: 16 dropout_p (float or None): Drop probability for the Dropout layer. If `None` the Dropout layer is not used. - For SENet154: 0.2 - For SE-ResNet models: None - For SE-ResNeXt models: None inplanes (int): Number of input channels for layer1. - For SENet154: 128 - For SE-ResNet models: 64 - For SE-ResNeXt models: 64 input_3x3 (bool): If `True`, use three 3x3 convolutions instead of a single 7x7 convolution in layer0. - For SENet154: True - For SE-ResNet models: False - For SE-ResNeXt models: False downsample_kernel_size (int): Kernel size for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 3 - For SE-ResNet models: 1 - For SE-ResNeXt models: 1 downsample_padding (int): Padding for downsampling convolutions in layer2, layer3 and layer4. - For SENet154: 1 - For SE-ResNet models: 0 - For SE-ResNeXt models: 0 num_classes (int): Number of outputs in `last_linear` layer. - For all models: 1000 """ super(SENet, self).__init__() self.inplanes = inplanes if input_3x3: layer0_modules = [ ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False)), ('bn1', nn.BatchNorm2d(64)), ('relu1', nn.ReLU(inplace=True)), ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False)), ('bn2', nn.BatchNorm2d(64)), ('relu2', nn.ReLU(inplace=True)), ('conv3', nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)), ('bn3', nn.BatchNorm2d(inplanes)), ('relu3', nn.ReLU(inplace=True)), ] else: layer0_modules = [ ('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)), ('bn1', nn.BatchNorm2d(inplanes)), ('relu1', nn.ReLU(inplace=True)), ] # To preserve compatibility with Caffe weights `ceil_mode=True` # is used instead of `padding=1`. layer0_modules.append(('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))) self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) self.layer1 = self._make_layer(block, planes=64, blocks=layers[0], groups=groups, reduction=reduction, downsample_kernel_size=1, downsample_padding=0) self.layer2 = self._make_layer( block, planes=128, blocks=layers[1], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer3 = self._make_layer( block, planes=256, blocks=layers[2], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) self.layer4 = self._make_layer( block, planes=512, blocks=layers[3], stride=2, groups=groups, reduction=reduction, downsample_kernel_size=downsample_kernel_size, downsample_padding=downsample_padding) # self.avg_pool = nn.AvgPool2d(7, stride=1) self.avg_pool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None self.last_linear = nn.Linear(512 * block.expansion, num_classes)