Exemple #1
0
    if CHECKPOINT_NAME != None:
        checkpoint = torch.load(CHECKPOINT_PATH + CHECKPOINT_NAME)
        train_loader, val_loader, test_loader, label_encoder, num_classes = load_data(
            checkpoint)
    else:
        train_loader, val_loader, test_loader, label_encoder, num_classes = load_data(
        )

    if RESNET_SIZE == 50:
        model = torchvision.models.resnet50(pretrained=True)
    elif RESNET_SIZE == 101:
        model = torchvision.models.resnet101(pretrained=True)
    else:
        raise ValueError("Invalid resnet size: ", RESNET_SIZE)
    model.avg_pool = nn.AdaptiveAvgPool2d(1)
    model.fc = nn.Linear(model.fc.in_features, num_classes)
    model.cuda()

    if CHECKPOINT_NAME != None:
        model.load_state_dict(checkpoint["model_state_dict"])
        epoch = int(checkpoint["epoch"]) + 1
        global_step = int(checkpoint["global_step"])

    global_start_time = time.time()

    if not PREDICT_ONLY:
        print("Training...")
        criterion = nn.CrossEntropyLoss()

        optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
Exemple #2
0
    def __init__(self,
                 block,
                 layers,
                 num_classes=1000,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.flatten = Flatten()
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    def __init__(self,
                 layers,
                 block=None,
                 k=1,
                 use_relu_=False,
                 use_bn_=True,
                 init='kaiming_normal',
                 bn_fc_mode=1,
                 split_output=False,
                 split_size=512,
                 descriptor_size=512,
                 pretrained=None):
        global use_relu
        use_relu = use_relu_
        global use_bn
        use_bn = use_bn_
        self.use_bn = use_bn
        self.split_output = split_output
        self.bn_fc_mode = bn_fc_mode
        self.inplanes = round(32 * k)
        super(ResNetCaffe, self).__init__()
        self.conv1 = nn.Conv2d(3,
                               round(32 * k),
                               kernel_size=3,
                               stride=1,
                               padding=0,
                               bias=not use_bn)

        scale = calculate_scale(self.conv1.weight.data)
        torch.nn.init.uniform_(self.conv1.weight.data, -scale, scale)
        if self.conv1.bias is not None:
            self.conv1.bias.data.zero_()
        if self.use_bn:
            self.bn1 = nn.BatchNorm2d(round(32 * k))
        if use_relu:
            self.relu = nn.ReLU(inplace=True)
        else:
            self.relu = nn.PReLU(round(32 * k))

        block = block if block is not None else BasicBlock
        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, round(64 * k), layers[0])
        self.layer2 = self._make_layer(block,
                                       round(128 * k),
                                       layers[1],
                                       stride=2)
        self.layer3 = self._make_layer(block,
                                       round(256 * k),
                                       layers[2],
                                       stride=2)
        self.layer4 = self._make_layer(block,
                                       round(512 * k),
                                       layers[3],
                                       stride=2)

        se_inplanes = 256
        self.se_block = SEBlock(256, 256 // 16)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        #self.fc = nn.Linear(512, descriptor_size)

        if pretrained:
            self._load_pretrained_weight(pretrained)
        else:
            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight,
                                            mode='fan_out',
                                            nonlinearity='relu')
                elif isinstance(m, nn.BatchNorm2d):
                    nn.init.constant_(m.weight, 1)
                    nn.init.constant_(m.bias, 0)
Exemple #4
0
    def __init__(
        self,
        num_classes,
        loss,
        block,
        layers,
        zero_init_residual=False,
        groups=1,
        width_per_group=64,
        replace_stride_with_dilation=None,
        norm_layer=None,
        last_stride=2,
        fc_dims=None,
        dropout_p=None,
        **kwargs
    ):
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer
        self.loss = loss
        self.feature_dim = 512 * block.expansion
        self.inplanes = 64
        self.dilation = 1


        self.acm_drop_rate = 0.75    # default is 0.75
        self.acm_threshold = 0.8


        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError(
                "replace_stride_with_dilation should be None "
                "or a 3-element tuple, got {}".
                format(replace_stride_with_dilation)
            )
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(
            3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
        )
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(
            block,
            128,
            layers[1],
            stride=2,
            dilate=replace_stride_with_dilation[0]
        )
        self.layer3 = self._make_layer(
            block,
            256,
            layers[2],
            stride=2,
            dilate=replace_stride_with_dilation[1]
        )
        self.layer4 = self._make_layer(
            block,
            512,
            layers[3],
            stride=last_stride,
            dilate=replace_stride_with_dilation[2]
        )
        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = self._construct_fc_layer(
            fc_dims, 512 * block.expansion, dropout_p
        )
        self.classifier = nn.Linear(self.feature_dim, num_classes)

        self._init_params()

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
    def __init__(self,
                 widen_factor=1.0,
                 num_classes=1000,
                 prelu=False,
                 input_channel=3):
        """ Constructor
        Args:
            widen_factor: config of widen_factor
            num_classes: number of classes
        """
        super(MobileNet, self).__init__()

        block = DepthWiseBlock
        self.conv1 = nn.Conv2d(input_channel,
                               int(32 * widen_factor),
                               kernel_size=3,
                               stride=2,
                               padding=1,
                               bias=False)

        self.bn1 = nn.BatchNorm2d(int(32 * widen_factor))
        if prelu:
            self.relu = nn.PReLU()
        else:
            self.relu = nn.ReLU(inplace=True)

        self.dw2_1 = block(32 * widen_factor, 64 * widen_factor, prelu=prelu)
        self.dw2_2 = block(64 * widen_factor,
                           128 * widen_factor,
                           stride=2,
                           prelu=prelu)

        self.dw3_1 = block(128 * widen_factor, 128 * widen_factor, prelu=prelu)
        self.dw3_2 = block(128 * widen_factor,
                           256 * widen_factor,
                           stride=2,
                           prelu=prelu)

        self.dw4_1 = block(256 * widen_factor, 256 * widen_factor, prelu=prelu)
        self.dw4_2 = block(256 * widen_factor,
                           512 * widen_factor,
                           stride=2,
                           prelu=prelu)

        self.dw5_1 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
        self.dw5_2 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
        self.dw5_3 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
        self.dw5_4 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
        self.dw5_5 = block(512 * widen_factor, 512 * widen_factor, prelu=prelu)
        self.dw5_6 = block(512 * widen_factor,
                           1024 * widen_factor,
                           stride=2,
                           prelu=prelu)

        self.dw6 = block(1024 * widen_factor, 1024 * widen_factor, prelu=prelu)

        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(int(1024 * widen_factor), num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
                m.weight.data.normal_(0, math.sqrt(2. / n))
            elif isinstance(m, nn.BatchNorm2d):
                m.weight.data.fill_(1)
                m.bias.data.zero_()
 def __init__(self, n_channels):
     super().__init__()
     C = n_channels
     self.block = nn.Sequential(nn.AdaptiveAvgPool2d(1), nn.BatchNorm2d(C),
                                ConvBnReLU(C, C, 1, 1))
     self.conv = nn.Conv2d(C, C, 3, 1, padding=1)
    def __init__(self, transformer, basis_equiv_layers, fc_sizes, shape_input,
                 sz_output, bias, pool_sz_conv, normalize_basis, stride_conv,
                 lr, normalized_l2, onebyoneconv, basis_equiv_layers_type,
                 pool_type, last_layer_type):
        super(BasisEquivariantNet, self).__init__()
        self.last_layer_type = last_layer_type
        self.basis_equiv_layers_type = basis_equiv_layers_type
        self.pool_type = pool_type
        self.normalize_basis = normalize_basis
        self.pool_sz_conv = pool_sz_conv
        self.basis_equiv_layers = basis_equiv_layers
        self.len_non1_basis_equiv_layers = len(
            [layer for layer in basis_equiv_layers if layer[2] != 1])
        self.sz_output = sz_output
        self.layers = nn.ModuleList()
        self.pool_sz = pool_sz_conv
        self.stride_conv = stride_conv
        if len(shape_input) == 3:
            if type != 'conv':
                # image has 1 transformation S
                # we add tuples (K, S) and (n, n) => (K, S, n, n)
                shape_input = tuple([shape_input[0], 1])
        else:
            raise ValueError("are we not sending images?")
        self.shape_input = shape_input

        # Add layers
        for idx, (nr_basis, nr_filters,
                  filter_sz) in enumerate(basis_equiv_layers):
            if pool_type == 'stride':
                stride = stride_conv[idx]
            else:
                stride = 1
            if basis_equiv_layers_type != 'conv':
                if filter_sz != 1:
                    basis_ae_layer = BasisAE(
                        in_shape=shape_input,
                        nr_basis=nr_basis,
                        transformer=transformer,
                        basis_sz=filter_sz,
                        padding=int(filter_sz / 2),
                        normalize=normalize_basis,
                        lr=lr,
                        index=idx,
                        normalized_l2=normalized_l2,
                        basis_type=basis_equiv_layers_type)
                else:
                    basis_ae_layer = None
                layer = BasisEquivConvLyer(
                    basis_ae=basis_ae_layer,
                    transformer=transformer,
                    in_shape=shape_input,
                    nr_basis=nr_basis,
                    nr_filters=nr_filters,
                    stride=stride,  # stride_conv[idx],
                    filter_sz=filter_sz,
                    conv_padding=int(filter_sz / 2),
                    bias=bias,
                    index=idx)
                self.layers.append(layer)
                shape_input = layer.out_shape
                if stride_conv[idx] == 2:
                    if self.pool_type == 'avg':
                        self.layers.append(nn.AvgPool2d((2, 2), 2))
                    elif self.pool_type == 'max':
                        self.layers.append(nn.MaxPool2d((2, 2), 2))

                self.layers.append(nn.BatchNorm3d(nr_filters))
                self.layers.append(nn.ReLU())
            else:
                assert nr_basis is None
                assert filter_sz == 3
                layer = nn.Conv2d(shape_input[0],
                                  nr_filters,
                                  filter_sz,
                                  stride=stride,
                                  padding=1,
                                  bias=bias)
                shape_input = (nr_filters, )
                self.layers.append(layer)
                if stride_conv[idx] == 2:
                    if self.pool_type == 'avg':
                        self.layers.append(nn.AvgPool2d((2, 2), 2))
                    elif self.pool_type == 'max':
                        self.layers.append(nn.MaxPool2d((2, 2), 2))
                self.layers.append(nn.BatchNorm2d(nr_filters))
                self.layers.append(nn.ReLU())

        if len(onebyoneconv) != 0:
            if self.basis_equiv_layers_type != 'conv':
                self.layers.append(nn.AdaptiveAvgPool3d((1, None, None)))
            for sz in onebyoneconv:
                self.layers.append(
                    nn.Conv1d(in_channels=shape_input[0],
                              out_channels=sz,
                              kernel_size=1))
                # if basis_equiv_layers_type != 'conv':
                #     shape_input = (sz, shape_input[1])
                #     self.layers.append(nn.BatchNorm3d(sz))
                # else:
                shape_input = (sz, )
                self.layers.append(nn.BatchNorm2d(sz))

                self.layers.append(nn.ReLU())

        if last_layer_type == 'conv1x1':
            self.layers.append(
                nn.Conv1d(in_channels=shape_input[0],
                          out_channels=sz_output,
                          kernel_size=1))
        if last_layer_type == 'group1x1':
            layer = BasisEquivConvLyer(
                basis_ae=None,
                transformer=transformer,
                in_shape=shape_input,
                nr_basis=0,
                nr_filters=sz_output,
                stride=1,  # stride_conv[idx],
                filter_sz=1,
                conv_padding=0,
                bias=bias,
                index=len(basis_equiv_layers))
            self.layers.append(layer)

        if self.pool_type == 'avg' or self.pool_type == 'stride':
            if self.basis_equiv_layers_type != 'conv' and len(
                    onebyoneconv) == 0:
                self.layers.append(nn.AdaptiveAvgPool3d((1, 1, 1)))
            else:
                self.layers.append(nn.AdaptiveAvgPool2d((1, 1)))
        elif self.pool_type == 'max':
            if self.basis_equiv_layers_type != 'conv' and len(
                    onebyoneconv) == 0:
                self.layers.append(nn.AdaptiveMaxPool3d((1, 1, 1)))
            else:
                self.layers.append(nn.AdaptiveMaxPool2d((1, 1)))

        if len(fc_sizes) != 0:
            for sz in fc_sizes:
                self.layers.append(nn.Linear(shape_input[0], sz))
                self.layers.append(nn.BatchNorm1d(sz))
                self.layers.append(nn.ReLU())
                shape_input = (sz, )
            # self.layers.append(nn.Linear(shape_input[0], sz_output))
        if last_layer_type == 'linear':
            self.layers.append(nn.Linear(shape_input[0], sz_output))
Exemple #8
0
    def forward(self, x, s_x=torch.FloatTensor(1,1,3,473,473).cuda(), s_y=torch.FloatTensor(1,1,473,473).cuda(), y=None):
        x_size = x.size()
        assert (x_size[2]-1) % 8 == 0 and (x_size[3]-1) % 8 == 0
        h = int((x_size[2] - 1) / 8 * self.zoom_factor + 1)
        w = int((x_size[3] - 1) / 8 * self.zoom_factor + 1)

        #   Éú³É Query Feature
        with torch.no_grad():
            query_feat_0 = self.layer0(x)
            query_feat_1 = self.layer1(query_feat_0)
            query_feat_2 = self.layer2(query_feat_1)
            query_feat_3 = self.layer3(query_feat_2)  
            query_feat_4 = self.layer4(query_feat_3)
            if self.vgg:
                query_feat_2 = F.interpolate(query_feat_2, size=(query_feat_3.size(2),query_feat_3.size(3)), mode='bilinear', align_corners=True)

        query_feat = torch.cat([query_feat_3, query_feat_2], 1)
        query_feat = self.down_query(query_feat)

        #   Éú³É Support Feature     
        supp_feat_list = []
        final_supp_list = []
        mask_list = []
        for i in range(self.shot):
            mask = (s_y[:,i,:,:] == 1).float().unsqueeze(1)
            mask_list.append(mask)
            with torch.no_grad():
                supp_feat_0 = self.layer0(s_x[:,i,:,:,:])
                supp_feat_1 = self.layer1(supp_feat_0)
                supp_feat_2 = self.layer2(supp_feat_1)
                supp_feat_3 = self.layer3(supp_feat_2)
                mask = F.interpolate(mask, size=(supp_feat_3.size(2), supp_feat_3.size(3)), mode='bilinear', align_corners=True)
                supp_feat_4 = self.layer4(supp_feat_3*mask)
                final_supp_list.append(supp_feat_4)
                if self.vgg:
                    supp_feat_2 = F.interpolate(supp_feat_2, size=(supp_feat_3.size(2),supp_feat_3.size(3)), mode='bilinear', align_corners=True)
            
            supp_feat = torch.cat([supp_feat_3, supp_feat_2], 1)
            supp_feat = self.down_supp(supp_feat)
            supp_feat = Weighted_GAP(supp_feat, mask)
            supp_feat_list.append(supp_feat)


        corr_query_mask_list = []
        cosine_eps = 1e-7
        for i, tmp_supp_feat in enumerate(final_supp_list):
            resize_size = tmp_supp_feat.size(2)
            tmp_mask = F.interpolate(mask_list[i], size=(resize_size, resize_size), mode='bilinear', align_corners=True)

            tmp_supp_feat_4 = tmp_supp_feat * tmp_mask                    
            q = query_feat_4
            s = tmp_supp_feat_4
            bsize, ch_sz, sp_sz, _ = q.size()[:]

            tmp_query = q
            tmp_query = tmp_query.contiguous().view(bsize, ch_sz, -1)
            tmp_query_norm = torch.norm(tmp_query, 2, 1, True) 

            tmp_supp = s               
            tmp_supp = tmp_supp.contiguous().view(bsize, ch_sz, -1) 
            tmp_supp = tmp_supp.contiguous().permute(0, 2, 1) 
            tmp_supp_norm = torch.norm(tmp_supp, 2, 2, True) 

            similarity = torch.bmm(tmp_supp, tmp_query)/(torch.bmm(tmp_supp_norm, tmp_query_norm) + cosine_eps)   
            similarity = similarity.max(1)[0].view(bsize, sp_sz*sp_sz)   
            similarity = (similarity - similarity.min(1)[0].unsqueeze(1))/(similarity.max(1)[0].unsqueeze(1) - similarity.min(1)[0].unsqueeze(1) + cosine_eps)
            corr_query = similarity.view(bsize, 1, sp_sz, sp_sz)
            corr_query = F.interpolate(corr_query, size=(query_feat_3.size()[2], query_feat_3.size()[3]), mode='bilinear', align_corners=True)
            corr_query_mask_list.append(corr_query)  
        corr_query_mask = torch.cat(corr_query_mask_list, 1).mean(1).unsqueeze(1)     
        corr_query_mask = F.interpolate(corr_query_mask, size=(query_feat.size(2), query_feat.size(3)), mode='bilinear', align_corners=True)  

        if self.shot > 1:
            supp_feat = supp_feat_list[0]
            for i in range(1, len(supp_feat_list)):
                supp_feat += supp_feat_list[i]
            supp_feat /= len(supp_feat_list)

        out_list = []
        pyramid_feat_list = []

        for idx, tmp_bin in enumerate(self.pyramid_bins):
            if tmp_bin <= 1.0:
                bin = int(query_feat.shape[2] * tmp_bin)
                query_feat_bin = nn.AdaptiveAvgPool2d(bin)(query_feat)
            else:
                bin = tmp_bin
                query_feat_bin = self.avgpool_list[idx](query_feat)
            supp_feat_bin = supp_feat.expand(-1, -1, bin, bin)
            corr_mask_bin = F.interpolate(corr_query_mask, size=(bin, bin), mode='bilinear', align_corners=True)
            merge_feat_bin = torch.cat([query_feat_bin, supp_feat_bin, corr_mask_bin], 1)
            merge_feat_bin = self.init_merge[idx](merge_feat_bin)

            if idx >= 1:
                pre_feat_bin = pyramid_feat_list[idx-1].clone()
                pre_feat_bin = F.interpolate(pre_feat_bin, size=(bin, bin), mode='bilinear', align_corners=True)
                rec_feat_bin = torch.cat([merge_feat_bin, pre_feat_bin], 1)
                merge_feat_bin = self.alpha_conv[idx-1](rec_feat_bin) + merge_feat_bin  

            merge_feat_bin = self.beta_conv[idx](merge_feat_bin) + merge_feat_bin   
            inner_out_bin = self.inner_cls[idx](merge_feat_bin)
            merge_feat_bin = F.interpolate(merge_feat_bin, size=(query_feat.size(2), query_feat.size(3)), mode='bilinear', align_corners=True)
            pyramid_feat_list.append(merge_feat_bin)
            out_list.append(inner_out_bin)
                 
        query_feat = torch.cat(pyramid_feat_list, 1)
        query_feat = self.res1(query_feat)
        query_feat = self.res2(query_feat) + query_feat           
        out = self.cls(query_feat)
        

        #   Output Part
        if self.zoom_factor != 1:
            out = F.interpolate(out, size=(h, w), mode='bilinear', align_corners=True)

        if self.training:
            main_loss = self.criterion(out, y.long())
            aux_loss = torch.zeros_like(main_loss).cuda()    

            for idx_k in range(len(out_list)):    
                inner_out = out_list[idx_k]
                inner_out = F.interpolate(inner_out, size=(h, w), mode='bilinear', align_corners=True)
                aux_loss = aux_loss + self.criterion(inner_out, y.long())   
            aux_loss = aux_loss / len(out_list)
            return out.max(1)[1], main_loss, aux_loss
        else:
            return out
Exemple #9
0
    def __init__(self, layers=50, classes=2, zoom_factor=8, \
        criterion=nn.CrossEntropyLoss(ignore_index=255), BatchNorm=nn.BatchNorm2d, \
        pretrained=True, sync_bn=True, shot=1, ppm_scales=[60, 30, 15, 8], vgg=False):
        super(PFENet, self).__init__()
        assert layers in [50, 101, 152]
        print(ppm_scales)
        assert classes > 1
        from torch.nn import BatchNorm2d as BatchNorm        
        self.zoom_factor = zoom_factor
        self.criterion = criterion
        self.shot = shot
        self.ppm_scales = ppm_scales
        self.vgg = vgg

        models.BatchNorm = BatchNorm
        
        if self.vgg:
            print('INFO: Using VGG_16 bn')
            vgg_models.BatchNorm = BatchNorm
            vgg16 = vgg_models.vgg16_bn(pretrained=pretrained)
            print(vgg16)
            self.layer0, self.layer1, self.layer2, \
                self.layer3, self.layer4 = get_vgg16_layer(vgg16)

        else:
            print('INFO: Using ResNet {}'.format(layers))
            if layers == 50:
                resnet = models.resnet50(pretrained=pretrained)
            elif layers == 101:
                resnet = models.resnet101(pretrained=pretrained)
            else:
                resnet = models.resnet152(pretrained=pretrained)
            self.layer0 = nn.Sequential(resnet.conv1, resnet.bn1, resnet.relu1, resnet.conv2, resnet.bn2, resnet.relu2, resnet.conv3, resnet.bn3, resnet.relu3, resnet.maxpool)
            self.layer1, self.layer2, self.layer3, self.layer4 = resnet.layer1, resnet.layer2, resnet.layer3, resnet.layer4

            for n, m in self.layer3.named_modules():
                if 'conv2' in n:
                    m.dilation, m.padding, m.stride = (2, 2), (2, 2), (1, 1)
                elif 'downsample.0' in n:
                    m.stride = (1, 1)
            for n, m in self.layer4.named_modules():
                if 'conv2' in n:
                    m.dilation, m.padding, m.stride = (4, 4), (4, 4), (1, 1)
                elif 'downsample.0' in n:
                    m.stride = (1, 1)

        reduce_dim = 256
        if self.vgg:
            fea_dim = 512 + 256
        else:
            fea_dim = 1024 + 512       

        self.cls = nn.Sequential(
            nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.1),                 
            nn.Conv2d(reduce_dim, classes, kernel_size=1)
        )                 

        self.down_query = nn.Sequential(
            nn.Conv2d(fea_dim, reduce_dim, kernel_size=1, padding=0, bias=False),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5)                  
        )
        self.down_supp = nn.Sequential(
            nn.Conv2d(fea_dim, reduce_dim, kernel_size=1, padding=0, bias=False),
            nn.ReLU(inplace=True),
            nn.Dropout2d(p=0.5)                   
        )  

        self.pyramid_bins = ppm_scales
        self.avgpool_list = []
        for bin in self.pyramid_bins:
            if bin > 1:
                self.avgpool_list.append(
                    nn.AdaptiveAvgPool2d(bin)
                )


        factor = 1
        mask_add_num = 1
        self.init_merge = []
        self.beta_conv = []
        self.inner_cls = []        
        for bin in self.pyramid_bins:
            self.init_merge.append(nn.Sequential(
                nn.Conv2d(reduce_dim*2 + mask_add_num, reduce_dim, kernel_size=1, padding=0, bias=False),
                nn.ReLU(inplace=True),
            ))                      
            self.beta_conv.append(nn.Sequential(
                nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
                nn.ReLU(inplace=True),
                nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
                nn.ReLU(inplace=True)
            ))            
            self.inner_cls.append(nn.Sequential(
                nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
                nn.ReLU(inplace=True),
                nn.Dropout2d(p=0.1),                 
                nn.Conv2d(reduce_dim, classes, kernel_size=1)
            ))            
        self.init_merge = nn.ModuleList(self.init_merge) 
        self.beta_conv = nn.ModuleList(self.beta_conv)
        self.inner_cls = nn.ModuleList(self.inner_cls)                             


        self.res1 = nn.Sequential(
            nn.Conv2d(reduce_dim*len(self.pyramid_bins), reduce_dim, kernel_size=1, padding=0, bias=False),
            nn.ReLU(inplace=True),                          
        )              
        self.res2 = nn.Sequential(
            nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),   
            nn.Conv2d(reduce_dim, reduce_dim, kernel_size=3, padding=1, bias=False),
            nn.ReLU(inplace=True),                             
        )                        
     
        self.GAP = nn.AdaptiveAvgPool2d(1)

        self.alpha_conv = []
        for idx in range(len(self.pyramid_bins)-1):
            self.alpha_conv.append(nn.Sequential(
                nn.Conv2d(512, 256, kernel_size=1, stride=1, padding=0, bias=False),
                nn.ReLU()
            ))     
        self.alpha_conv = nn.ModuleList(self.alpha_conv)
Exemple #10
0
    def __init__(self,
                 block=Bottleneck,
                 layers=[3, 5, 11, 7],
                 class_num=4,
                 label_num=5,
                 dropout=0.7,
                 mode='multi',
                 vocab_size=0):
        super(SlowFast, self).__init__()
        self.mode = mode
        self.embed_dim = 64
        self.audio_size = 1024
        self.class_num = class_num
        self.label_num = label_num
        self.fast_inplanes = 8
        self.fast_conv1 = nn.Conv3d(3,
                                    8,
                                    kernel_size=(5, 7, 7),
                                    stride=(1, 2, 2),
                                    padding=(2, 3, 3),
                                    bias=False)
        self.fast_bn1 = nn.BatchNorm3d(8)
        self.fast_relu = nn.ReLU(inplace=True)
        self.fast_maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3),
                                         stride=(1, 2, 2),
                                         padding=(0, 1, 1))
        self.fast_res2 = self._make_layer_fast(block,
                                               8,
                                               layers[0],
                                               head_conv=5)
        self.fast_res3 = self._make_layer_fast(block,
                                               16,
                                               layers[1],
                                               stride=2,
                                               head_conv=5)
        self.fast_res4 = self._make_layer_fast(block,
                                               32,
                                               layers[2],
                                               stride=2,
                                               head_conv=3)
        self.fast_res5 = self._make_layer_fast(block,
                                               64,
                                               layers[3],
                                               stride=2,
                                               head_conv=3)

        self.lateral_p1 = nn.Conv3d(8,
                                    8 * 2,
                                    kernel_size=(5, 1, 1),
                                    stride=(8, 1, 1),
                                    bias=False,
                                    padding=(2, 0, 0))
        self.lateral_res2 = nn.Conv3d(32,
                                      32 * 2,
                                      kernel_size=(5, 1, 1),
                                      stride=(8, 1, 1),
                                      bias=False,
                                      padding=(2, 0, 0))
        self.lateral_res3 = nn.Conv3d(64,
                                      64 * 2,
                                      kernel_size=(5, 1, 1),
                                      stride=(8, 1, 1),
                                      bias=False,
                                      padding=(2, 0, 0))
        self.lateral_res4 = nn.Conv3d(128,
                                      128 * 2,
                                      kernel_size=(5, 1, 1),
                                      stride=(8, 1, 1),
                                      bias=False,
                                      padding=(2, 0, 0))

        self.slow_inplanes = 64 + 64 // 8 * 2
        self.slow_conv1 = nn.Conv3d(3,
                                    64,
                                    kernel_size=(1, 7, 7),
                                    stride=(1, 2, 2),
                                    padding=(0, 3, 3),
                                    bias=False)
        self.slow_bn1 = nn.BatchNorm3d(64)
        self.slow_relu = nn.ReLU(inplace=True)
        self.slow_maxpool = nn.MaxPool3d(kernel_size=(1, 3, 3),
                                         stride=(1, 2, 2),
                                         padding=(0, 1, 1))
        self.slow_res2 = self._make_layer_slow(block,
                                               64,
                                               layers[0],
                                               head_conv=3)
        self.slow_res3 = self._make_layer_slow(block,
                                               128,
                                               layers[1],
                                               stride=2,
                                               head_conv=3)
        self.slow_res4 = self._make_layer_slow(block,
                                               256,
                                               layers[2],
                                               stride=2,
                                               head_conv=3)
        self.slow_res5 = self._make_layer_slow(block,
                                               512,
                                               layers[3],
                                               stride=2,
                                               head_conv=3)

        self.dp = nn.Dropout(dropout)

        self.classifier1 = nn.ModuleList([
            nn.Linear(128 * 32 + self.embed_dim + self.audio_size, class_num)
            for _ in range(label_num)
        ])
        #self.classifier2 = nn.ModuleList([nn.Linear(256, class_num) for _ in range(label_num)])

        #self.classifier = nn.Linear(self.fast_inplanes + 2048 + self.embed_dim + self.audio_size, class_num * label_num)
        # text #
        self.embedding = nn.EmbeddingBag(vocab_size, self.embed_dim)
        #self.textfc1 = nn.Linear(self.embed_dim, self.embed_dim)
        #self.textfc2 = nn.Linear(self.embed_dim, self.embed_dim)
        self.text_init_weights()

        # genre fc, age fc #
        self.genrefc = nn.Linear(128 * 32 + self.embed_dim + self.audio_size,
                                 9)
        self.agefc = nn.Linear(128 * 32 + self.embed_dim + self.audio_size, 4)

        self.extract_audio = nn.Sequential(nn.Conv2d(1, 32, kernel_size=(3,5), stride=(1,2), padding=(1,0)),\
                                        nn.BatchNorm2d(32),
                                        nn.LeakyReLU(),
                                        nn.MaxPool2d((1,2)),
                                        nn.Conv2d(32, 64, kernel_size=(3,5), stride=(1,2), padding=(1,1)),
                                        nn.BatchNorm2d(64),
                                        nn.LeakyReLU(),
                                        nn.MaxPool2d((1,2)),
                                        nn.Conv2d(64, 128, kernel_size=(3,5), stride=(1,2), padding=(1,1)),
                                        nn.BatchNorm2d(128),
                                        nn.LeakyReLU(),
                                        nn.MaxPool2d((1,2)),
                                        nn.Conv2d(128, 256, kernel_size=(3,5), stride=(1,2), padding=(0,1)),
                                        nn.BatchNorm2d(256),
                                        nn.LeakyReLU(),
                                        nn.MaxPool2d(2),
                                        nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=(0,1)),
                                        nn.AdaptiveAvgPool2d(2)
                                        )
        self.lstm = nn.LSTM(self.fast_inplanes + 2048,
                            hidden_size=128,
                            num_layers=2,
                            batch_first=False,
                            bidirectional=True,
                            dropout=0.2)
    def __init__(self,
                 n_classes,
                 use_instance_seg,
                 use_coords,
                 pixel_embedding_dim=16):
        super(Architecture, self).__init__()

        self.n_classes = n_classes
        self.use_instance_seg = use_instance_seg
        self.use_coords = use_coords

        self.cnn = BaseCNN(use_coords=self.use_coords)
        self.renet1 = ReNet(n_input=256, n_units=100)
        self.renet2 = ReNet(n_input=100 * 2, n_units=100)

        self.upsampling1 = nn.ConvTranspose2d(
            in_channels=100 * 2,
            out_channels=100,
            kernel_size=(2, 2),
            stride=(2, 2),
        )
        self.relu1 = nn.ReLU()
        self.upsampling2 = nn.ConvTranspose2d(
            in_channels=100 + self.cnn.n_filters[1],
            out_channels=100,
            kernel_size=(2, 2),
            stride=(2, 2),
        )
        self.relu2 = nn.ReLU()

        self.sem_seg_output = nn.Conv2d(in_channels=100 +
                                        self.cnn.n_filters[0],
                                        out_channels=self.n_classes,
                                        kernel_size=(1, 1),
                                        stride=(1, 1))

        if self.use_instance_seg:
            self.ins_seg_output = nn.Conv2d(in_channels=100 +
                                            self.cnn.n_filters[0],
                                            out_channels=pixel_embedding_dim,
                                            kernel_size=(1, 1),
                                            stride=(1, 1))

        self.ins_cls_cnn = nn.Sequential()
        self.ins_cls_cnn.add_module("pool1", nn.MaxPool2d(2, stride=2))
        self.ins_cls_cnn.add_module(
            "conv1",
            nn.Conv2d(in_channels=100 * 2,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.ins_cls_cnn.add_module("relu1", nn.ReLU())
        self.ins_cls_cnn.add_module(
            "conv2",
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.ins_cls_cnn.add_module("relu2", nn.ReLU())
        self.ins_cls_cnn.add_module("pool2",
                                    nn.MaxPool2d(kernel_size=2, stride=2))
        self.ins_cls_cnn.add_module(
            "conv3",
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.ins_cls_cnn.add_module("relu3", nn.ReLU())
        self.ins_cls_cnn.add_module(
            "conv4",
            nn.Conv2d(in_channels=64,
                      out_channels=64,
                      kernel_size=(3, 3),
                      stride=(1, 1),
                      padding=(1, 1)))
        self.ins_cls_cnn.add_module("relu4", nn.ReLU())
        self.ins_cls_cnn.add_module("pool3", nn.AdaptiveAvgPool2d(
            (1, 1)))  # b, nf, 1, 1

        self.ins_cls_out = nn.Sequential()
        self.ins_cls_out.add_module("linear", nn.Linear(64, 1))
        self.ins_cls_out.add_module("sigmoid", nn.Sigmoid())
Exemple #12
0
    def __init__(self, dropout_rate):
        """ This function instantiates all the model layers """

        super(Net, self).__init__()

        self.convblock1 = nn.Sequential(
            nn.Conv2d(
                in_channels=3, out_channels=32, kernel_size=3,
                padding=1),  # Input: 32x32x3 | Output: 32x32x32 | RF: 3x3
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_rate),
            nn.Conv2d(
                in_channels=32, out_channels=64, kernel_size=3,
                padding=1),  # Input: 32x32x32 | Output: 32x32x64 | RF: 5x5
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_rate))

        self.transblock1 = nn.Sequential(
            nn.MaxPool2d(2, 2),  # Input: 32x32x64 | Output: 16x16x64 | RF: 6x6
            nn.Conv2d(
                in_channels=64, out_channels=32,
                kernel_size=1)  # Input: 16x16x64 | Output: 16x16x32 | RF: 6x6
        )

        self.convblock2 = nn.Sequential(
            nn.Conv2d(
                in_channels=32, out_channels=32, kernel_size=3,
                padding=1),  # Input: 16x16x32 | Output: 16x16x32 | RF: 10x10
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_rate),
            nn.Conv2d(
                in_channels=32, out_channels=64, kernel_size=3,
                padding=1),  # Input: 16x16x32 | Output: 16x16x64 | RF: 14x14
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_rate))

        self.transblock2 = nn.Sequential(
            nn.MaxPool2d(2, 2),  # Input: 16x16x64 | Output: 8x8x64 | RF: 16x16
            nn.Conv2d(
                in_channels=64, out_channels=32,
                kernel_size=1)  # Input: 8x8x64 | Output: 8x8x32 | RF: 16x16
        )

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32,
                      out_channels=32,
                      kernel_size=3,
                      padding=1),  # Input: 8x8x32 | Output: 8x8x32 | RF: 24x24
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_rate),

            # Depthwise separable convolution
            nn.Conv2d(in_channels=32,
                      out_channels=32,
                      kernel_size=3,
                      groups=32,
                      padding=1),  # Input: 8x8x32 | Output: 8x8x32 | RF: 32x32
            nn.Conv2d(
                in_channels=32, out_channels=64,
                kernel_size=1),  # Input: 8x8x32 | Output: 8x8x64 | RF: 32x32
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_rate))

        self.transblock3 = nn.Sequential(
            nn.MaxPool2d(2, 2),  # Input: 8x8x64 | Output: 4x4x64 | RF: 36x36
            nn.Conv2d(
                in_channels=64, out_channels=32,
                kernel_size=1)  # Input: 4x4x64 | Output: 4x4x32 | RF: 36x36
        )

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=32,
                      out_channels=32,
                      kernel_size=3,
                      padding=1),  # Input: 4x4x32 | Output: 4x4x32 | RF: 52x52
            nn.ReLU(),
            nn.BatchNorm2d(32),
            nn.Dropout(dropout_rate),

            # Dilated convolution
            nn.Conv2d(
                in_channels=32,
                out_channels=64,
                kernel_size=3,
                padding=1,
                dilation=2),  # Input: 4x4x32 | Output: 4x4x64 | RF: 84x84
            nn.ReLU(),
            nn.BatchNorm2d(64),
            nn.Dropout(dropout_rate))

        self.gap = nn.Sequential(nn.AdaptiveAvgPool2d(
            1))  # Input: 4x4x64 | Output: 1x1x64 | RF: 108x108

        self.fc = nn.Sequential(nn.Linear(64, 10))
Exemple #13
0
    def __init__(
        self,
        block_params: BlockParams,
        num_classes: int = 1000,
        stem_width: int = 32,
        stem_type: Optional[Callable[..., nn.Module]] = None,
        block_type: Optional[Callable[..., nn.Module]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None,
        activation: Optional[Callable[..., nn.Module]] = None,
    ) -> None:
        super().__init__()
        _log_api_usage_once(self)

        if stem_type is None:
            stem_type = SimpleStemIN
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if block_type is None:
            block_type = ResBottleneckBlock
        if activation is None:
            activation = nn.ReLU

        # Ad hoc stem
        self.stem = stem_type(
            3,  # width_in
            stem_width,
            norm_layer,
            activation,
        )

        current_width = stem_width

        blocks = []
        for i, (
                width_out,
                stride,
                depth,
                group_width,
                bottleneck_multiplier,
        ) in enumerate(block_params._get_expanded_params()):
            blocks.append((
                f"block{i+1}",
                AnyStage(
                    current_width,
                    width_out,
                    stride,
                    depth,
                    block_type,
                    norm_layer,
                    activation,
                    group_width,
                    bottleneck_multiplier,
                    block_params.se_ratio,
                    stage_index=i + 1,
                ),
            ))

            current_width = width_out

        self.trunk_output = nn.Sequential(OrderedDict(blocks))

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(in_features=current_width,
                            out_features=num_classes)

        # Init weights and good to go
        self._reset_parameters()
Exemple #14
0
    def _prepare_base_model(self, base_model, config={}):
        print('=> base model: {}'.format(base_model))

        if base_model.startswith('resnet'):

            self.base_model = getattr(torchvision.models, base_model)(True)

            if self.is_shift:
                print('Adding temporal shift...')
                from ops.temporal_shift import make_temporal_shift
                make_temporal_shift(self.base_model,
                                    self.num_segments,
                                    n_div=self.shift_div,
                                    place=self.shift_place,
                                    temporal_pool=self.temporal_pool,
                                    two_path=True)

            if self.tin:
                print('Adding temporal deformable conv...')
                from ops.temporal_interlace import make_temporal_interlace
                make_temporal_interlace(self.base_model,
                                        self.num_segments,
                                        shift_div=self.shift_div)

            if self.non_local:
                print('Adding non-local module...')
                from ops.non_local import make_non_local
                make_non_local(self.base_model, self.num_segments)

            self.base_model.last_layer_name = 'fc'
            self.input_size = 224
            self.input_mean = [0.485, 0.456, 0.406]
            self.input_std = [0.229, 0.224, 0.225]

            self.base_model.avgpool = nn.AdaptiveAvgPool2d(1)

            if self.modality == 'Flow':
                self.input_mean = [0.5]
                self.input_std = [np.mean(self.input_std)]
            elif self.modality == 'RGBDiff':
                self.input_mean = [0.485, 0.456, 0.406
                                   ] + [0] * 3 * self.new_length
                self.input_std = self.input_std + [
                    np.mean(self.input_std) * 2
                ] * 3 * self.new_length

        elif base_model == 'BNInception':
            from archs.bn_inception import bninception
            self.base_model = bninception(pretrained=self.pretrain)
            self.input_size = self.base_model.input_size
            self.input_mean = self.base_model.mean
            self.input_std = self.base_model.std
            self.base_model.last_layer_name = 'fc'
            if self.modality == 'Flow':
                self.input_mean = [128]
            elif self.modality == 'RGBDiff':
                self.input_mean = self.input_mean * (1 + self.new_length)
            if self.is_shift:
                print('Adding temporal shift...')
                self.base_model.build_temporal_ops(
                    self.num_segments,
                    is_temporal_shift=self.shift_place,
                    shift_div=self.shift_div)
def train():
    model = resnet50(pretrained=False)
    model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    model.fc = nn.Linear(model.fc.in_features, out_dim)

    print(model)

    optimizer = torch.optim.Adam(model.parameters(), lr=LR)
    lossFunc = nn.MSELoss()  # the target is not one-hotted

    # 读取训练集和测试集
    train_x = np.load('./data/train_data.npy')
    train_y = pd.read_pickle('./data/train_y.pkl').values
    test_x = np.load('./data/test_data.npy')
    # 划分训练集和验证集
    train_x, val_x, train_y, val_y = train_test_split(train_x,
                                                      train_y,
                                                      test_size=0.2,
                                                      random_state=2018)
    print('划分后 train size is {}, test_size is {}'.format(
        train_x.shape, val_x.shape))

    # 转为tensor
    tensor_train_x = torch.from_numpy(train_x).float()
    tensor_train_y = torch.from_numpy(train_y).float()

    tensor_val_x = torch.from_numpy(val_x).float()
    tensor_val_y = torch.from_numpy(val_y).float()

    tensor_test_x = torch.from_numpy(test_x).float()

    train_data = Data.TensorDataset(tensor_train_x, tensor_train_y)
    val_data = Data.TensorDataset(tensor_val_x, tensor_val_y)

    train_loader = Data.DataLoader(dataset=train_data,
                                   batch_size=BATCH_SIZE,
                                   shuffle=True,
                                   num_workers=15)
    val_loader = Data.DataLoader(dataset=val_data,
                                 batch_size=BATCH_SIZE,
                                 shuffle=True,
                                 num_workers=15)

    # 标签和对应属性表
    label_data = pd.read_csv(osp.join(train_dir, 'attributes_per_class.txt'),
                             sep='\t',
                             header=None)

    def get_label(predict, data):
        print(data.columns)
        label_index = np.zeros((predict.shape[0], ))
        label_name = data[0].values.tolist()
        data30 = data.iloc[:, 1:].values
        print(data30.shape)
        for i in range(predict.shape[0]):
            min_dist = 1000000.
            for j in range(len(label_name)):
                # 比较欧式距离
                cur_dist = np.sqrt(np.sum(np.square(data30[j] - predict[i])))
                if min_dist > cur_dist:
                    print('min_dist:', min_dist, ' |cur_dist: ', cur_dist,
                          ' |label name: ', label_name[j])
                    min_dist = cur_dist
                    label_index[i] = j
        predict_label = [label_name[int(i)] for i in label_index]
        return np.array(predict_label)

    for epoch in range(EPOCH):
        train_loss = 0.
        # model train
        print('Training...')
        for step, (batch_x, batch_y) in enumerate(train_loader):
            predict = model(batch_x)
            loss = lossFunc(predict, batch_y)
            train_loss += loss.data[0]
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # model eval
        model.eval()
        eval_loss = 0.
        print('Validation....')
        for step, (batch_x, batch_y) in enumerate(val_loader):
            predict = model(batch_x)
            loss = lossFunc(predict, batch_y)
            eval_loss += loss.data[0]

        print('Epoch: ', epoch, '|Train Loss: ', train_loss, '|Val Loss: ',
              eval_loss)
        print('-' * 10)

        print('Testing....')
        model.eval()
        predict = model(tensor_test_x)
        predict_label = get_label(predict.data.numpy(), label_data)
        print(predict_label.shape)

        # submit
        result = pd.read_csv(osp.join(test_dir, 'image.txt'),
                             sep='\t',
                             header=None)
        result['label'] = predict_label
        result.to_csv('resnet18_submit.txt',
                      header=None,
                      index=False,
                      sep='\t')

        torch.save(model.state_dict(), './resnet18_params.pkl')
Exemple #16
0
 def __init__(self, num_in, num_class):
     super(GlobalpoolFC, self).__init__()
     self.pool = nn.AdaptiveAvgPool2d(output_size=1)
     self.fc = nn.Linear(num_in, num_class)
    def __init__(self,
                 block,
                 layers,
                 num_classes=1000,
                 zero_init_residual=False,
                 groups=1,
                 width_per_group=64,
                 replace_stride_with_dilation=None,
                 norm_layer=None):
        super(ResNet, self).__init__()
        inplace = True
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead

            # -----------------------------
            # modified
            replace_stride_with_dilation = [False, False, False]
            # -----------------------------

        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(
                                 replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        #layer  for RGB input
        self.conv1 = nn.Conv2d(3,
                               self.inplanes,
                               kernel_size=7,
                               stride=2,
                               padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block,
                                       128,
                                       layers[1],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block,
                                       256,
                                       layers[2],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block,
                                       512,
                                       layers[3],
                                       stride=2,
                                       dilate=replace_stride_with_dilation[2])

        self.avgpool = nn.AdaptiveAvgPool2d(1)

        #layer for merge
        self.att_rgb = SELayer(64)

        self.att_rgb_layer1 = SELayer(256)

        self.att_rgb_layer2 = SELayer(512)

        self.att_d_layer3 = SELayer(1024)
        self.att_rgb_layer3 = SELayer(1024)

        self.att_d_layer4 = SELayer(2048)
        self.att_rgb_layer4 = SELayer(2048)

        #layer for depth layer
        inplace = True
        self.conv1_7x7_s2 = nn.Conv2d(1,
                                      64,
                                      kernel_size=(7, 7),
                                      stride=(2, 2),
                                      padding=(3, 3))
        self.conv1_7x7_s2_bn = nn.BatchNorm2d(64, affine=True)
        self.conv1_relu_7x7 = nn.ReLU(inplace)

        self.pool1_3x3_s2 = nn.MaxPool2d((3, 3),
                                         stride=(2, 2),
                                         dilation=(1, 1),
                                         ceil_mode=True)
        self.conv2_3x3_reduce = nn.Conv2d(64,
                                          64,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
        self.conv2_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True)
        self.conv2_relu_3x3_reduce = nn.ReLU(inplace)

        self.conv2_3x3 = nn.Conv2d(64,
                                   128,
                                   kernel_size=(3, 3),
                                   stride=(1, 1),
                                   padding=(1, 1))
        self.conv2_3x3_bn = nn.BatchNorm2d(128, affine=True)
        self.conv2_relu_3x3 = nn.ReLU(inplace)
        self.pool2_3x3_s2 = nn.MaxPool2d((3, 3),
                                         stride=(2, 2),
                                         dilation=(1, 1),
                                         ceil_mode=True)

        self.inception_3a_1x1 = nn.Conv2d(128,
                                          64,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
        self.inception_3a_1x1_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_1x1 = nn.ReLU(inplace)

        self.inception_3a_3x3_reduce = nn.Conv2d(128,
                                                 64,
                                                 kernel_size=(1, 1),
                                                 stride=(1, 1))
        self.inception_3a_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_3x3_reduce = nn.ReLU(inplace)
        self.inception_3a_3x3 = nn.Conv2d(64,
                                          64,
                                          kernel_size=(3, 3),
                                          stride=(1, 1),
                                          padding=(1, 1))
        self.inception_3a_3x3_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_3x3 = nn.ReLU(inplace)

        self.inception_3a_double_3x3_reduce = nn.Conv2d(128,
                                                        64,
                                                        kernel_size=(1, 1),
                                                        stride=(1, 1))
        self.inception_3a_double_3x3_reduce_bn = nn.BatchNorm2d(64,
                                                                affine=True)
        self.inception_3a_relu_double_3x3_reduce = nn.ReLU(inplace)
        self.inception_3a_double_3x3_1 = nn.Conv2d(64,
                                                   64,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_3a_double_3x3_1_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_double_3x3_1 = nn.ReLU(inplace)
        self.inception_3a_double_3x3_2 = nn.Conv2d(64,
                                                   64,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_3a_double_3x3_2_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_double_3x3_2 = nn.ReLU(inplace)

        self.inception_3a_pool = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              ceil_mode=True,
                                              count_include_pad=True)
        self.inception_3a_pool_proj = nn.Conv2d(128,
                                                64,
                                                kernel_size=(1, 1),
                                                stride=(1, 1))
        self.inception_3a_pool_proj_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_3a_relu_pool_proj = nn.ReLU(inplace)
        self.inception_3c_pool = nn.MaxPool2d((3, 3),
                                              stride=(2, 2),
                                              dilation=(1, 1),
                                              ceil_mode=True)

        self.inception_4a_1x1 = nn.Conv2d(256,
                                          128,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
        self.inception_4a_1x1_bn = nn.BatchNorm2d(128, affine=True)
        self.inception_4a_relu_1x1 = nn.ReLU(inplace)

        self.inception_4a_3x3_reduce = nn.Conv2d(256,
                                                 64,
                                                 kernel_size=(1, 1),
                                                 stride=(1, 1))
        self.inception_4a_3x3_reduce_bn = nn.BatchNorm2d(64, affine=True)
        self.inception_4a_relu_3x3_reduce = nn.ReLU(inplace)
        self.inception_4a_3x3 = nn.Conv2d(64,
                                          128,
                                          kernel_size=(3, 3),
                                          stride=(1, 1),
                                          padding=(1, 1))
        self.inception_4a_3x3_bn = nn.BatchNorm2d(128, affine=True)
        self.inception_4a_relu_3x3 = nn.ReLU(inplace)

        self.inception_4a_double_3x3_reduce = nn.Conv2d(256,
                                                        64,
                                                        kernel_size=(1, 1),
                                                        stride=(1, 1))
        self.inception_4a_double_3x3_reduce_bn = nn.BatchNorm2d(64,
                                                                affine=True)
        self.inception_4a_relu_double_3x3_reduce = nn.ReLU(inplace)
        self.inception_4a_double_3x3_1 = nn.Conv2d(64,
                                                   128,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_4a_double_3x3_1_bn = nn.BatchNorm2d(128, affine=True)
        self.inception_4a_relu_double_3x3_1 = nn.ReLU(inplace)
        self.inception_4a_double_3x3_2 = nn.Conv2d(128,
                                                   128,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_4a_double_3x3_2_bn = nn.BatchNorm2d(128, affine=True)
        self.inception_4a_relu_double_3x3_2 = nn.ReLU(inplace)

        self.inception_4a_pool = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              ceil_mode=True,
                                              count_include_pad=True)
        self.inception_4a_pool_proj = nn.Conv2d(256,
                                                128,
                                                kernel_size=(1, 1),
                                                stride=(1, 1))
        self.inception_4a_pool_proj_bn = nn.BatchNorm2d(128, affine=True)
        self.inception_4a_relu_pool_proj = nn.ReLU(inplace)
        self.inception_4e_pool = nn.MaxPool2d((3, 3),
                                              stride=(2, 2),
                                              dilation=(1, 1),
                                              ceil_mode=True)

        self.inception_5a_1x1 = nn.Conv2d(512,
                                          256,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
        self.inception_5a_1x1_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_1x1 = nn.ReLU(inplace)

        self.inception_5a_3x3_reduce = nn.Conv2d(512,
                                                 256,
                                                 kernel_size=(1, 1),
                                                 stride=(1, 1))
        self.inception_5a_3x3_reduce_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_3x3_reduce = nn.ReLU(inplace)
        self.inception_5a_3x3 = nn.Conv2d(256,
                                          256,
                                          kernel_size=(3, 3),
                                          stride=(1, 1),
                                          padding=(1, 1))
        self.inception_5a_3x3_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_3x3 = nn.ReLU(inplace)

        self.inception_5a_double_3x3_reduce = nn.Conv2d(512,
                                                        256,
                                                        kernel_size=(1, 1),
                                                        stride=(1, 1))
        self.inception_5a_double_3x3_reduce_bn = nn.BatchNorm2d(256,
                                                                affine=True)
        self.inception_5a_relu_double_3x3_reduce = nn.ReLU(inplace)
        self.inception_5a_double_3x3_1 = nn.Conv2d(256,
                                                   256,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_5a_double_3x3_1_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_double_3x3_1 = nn.ReLU(inplace)
        self.inception_5a_double_3x3_2 = nn.Conv2d(256,
                                                   256,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_5a_double_3x3_2_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_double_3x3_2 = nn.ReLU(inplace)

        self.inception_5a_pool = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              ceil_mode=True,
                                              count_include_pad=True)
        self.inception_5a_pool_proj = nn.Conv2d(512,
                                                256,
                                                kernel_size=(1, 1),
                                                stride=(1, 1))
        self.inception_5a_pool_proj_bn = nn.BatchNorm2d(256, affine=True)
        self.inception_5a_relu_pool_proj = nn.ReLU(inplace)
        self.inception_5c_pool = nn.MaxPool2d((3, 3),
                                              stride=(2, 2),
                                              dilation=(1, 1),
                                              ceil_mode=True)

        self.inception_6a_1x1 = nn.Conv2d(1024,
                                          512,
                                          kernel_size=(1, 1),
                                          stride=(1, 1))
        self.inception_6a_1x1_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_1x1 = nn.ReLU(inplace)

        self.inception_6a_3x3_reduce = nn.Conv2d(1024,
                                                 512,
                                                 kernel_size=(1, 1),
                                                 stride=(1, 1))
        self.inception_6a_3x3_reduce_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_3x3_reduce = nn.ReLU(inplace)
        self.inception_6a_3x3 = nn.Conv2d(512,
                                          512,
                                          kernel_size=(3, 3),
                                          stride=(1, 1),
                                          padding=(1, 1))
        self.inception_6a_3x3_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_3x3 = nn.ReLU(inplace)

        self.inception_6a_double_3x3_reduce = nn.Conv2d(1024,
                                                        512,
                                                        kernel_size=(1, 1),
                                                        stride=(1, 1))
        self.inception_6a_double_3x3_reduce_bn = nn.BatchNorm2d(512,
                                                                affine=True)
        self.inception_6a_relu_double_3x3_reduce = nn.ReLU(inplace)
        self.inception_6a_double_3x3_1 = nn.Conv2d(512,
                                                   512,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_6a_double_3x3_1_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_double_3x3_1 = nn.ReLU(inplace)
        self.inception_6a_double_3x3_2 = nn.Conv2d(512,
                                                   512,
                                                   kernel_size=(3, 3),
                                                   stride=(1, 1),
                                                   padding=(1, 1))
        self.inception_6a_double_3x3_2_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_double_3x3_2 = nn.ReLU(inplace)

        self.inception_6a_pool = nn.AvgPool2d(3,
                                              stride=1,
                                              padding=1,
                                              ceil_mode=True,
                                              count_include_pad=True)
        self.inception_6a_pool_proj = nn.Conv2d(1024,
                                                512,
                                                kernel_size=(1, 1),
                                                stride=(1, 1))
        self.inception_6a_pool_proj_bn = nn.BatchNorm2d(512, affine=True)
        self.inception_6a_relu_pool_proj = nn.ReLU(inplace)
        self.inception_6e_pool = nn.MaxPool2d((3, 3),
                                              stride=(2, 2),
                                              dilation=(1, 1),
                                              ceil_mode=True)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight,
                                        mode='fan_out',
                                        nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
Exemple #18
0
 def __init__(self, sz=None):
     super().__init__()
     sz = sz or (1, 1)
     self.ap = nn.AdaptiveAvgPool2d(sz)
     self.mp = nn.AdaptiveMaxPool2d(sz)
Exemple #19
0
 def __init__(self, in_channels=None, out_channels=None, **kwargs):
     super().__init__(in_channels, out_channels, **kwargs)
     self.global_pool = nn.AdaptiveAvgPool2d((1, 1))
    def __init__(self, **kwargs):
        super(MobileNetV2, self).__init__()

        input_channel = kwargs.pop("input_channels", 3)
        num_classes = kwargs.pop("num_classes", 1000)
        widen_factor = kwargs.pop("widen_factor", 1.0)

        mode = kwargs.pop("mode", "ori").lower()
        assert mode in {"ori", "lv", "tiny"}

        # TODO: add new activation
        activation = kwargs.pop("activation", "relu").lower()
        assert activation in {"relu", "prelu"}

        interverted_residual_setting = INTERVERTED_RESIDUAL_SETTING[mode]
        input_channels = INPUT_CHANNELS[mode]
        last_channel = LAST_CHANNEL[mode]

        block = InvertedResidual

        # building first layer
        # assert input_size % 32 == 0
        input_channels = int(input_channels * widen_factor)
        self.last_channel = int(
            last_channel *
            widen_factor) if widen_factor > 1.0 else last_channel
        self.features = [
            conv_bn(input_channel, input_channels, 2, activation=activation)
        ]
        # building inverted residual blocks
        for t, c, n, s in interverted_residual_setting:
            output_channels = int(c * widen_factor)
            for i in range(n):
                if i == 0:
                    self.features.append(
                        block(input_channels,
                              output_channels,
                              s,
                              expand_ratio=t,
                              activation=activation))
                else:
                    self.features.append(
                        block(input_channels,
                              output_channels,
                              1,
                              expand_ratio=t,
                              activation=activation))
                input_channels = output_channels
        # building last several layers
        self.features.append(
            conv_1x1_bn(input_channels,
                        self.last_channel,
                        activation=activation))
        # make it nn.Sequential
        self.features = nn.Sequential(*self.features)

        # building classifier
        # self.classifier = nn.Sequential(
        #     nn.Dropout(0.2),
        #     nn.Linear(self.last_channel, num_classes),
        # )

        self.classifier = nn.Linear(self.last_channel, num_classes)
        self.avgpool = nn.AdaptiveAvgPool2d(1)

        self._initialize_weights()
    def __init__(self, rgb=3):
        super(HrCNN, self).__init__()
        self.rgb = rgb

        self.ada_avg_pool2d = nn.AdaptiveAvgPool2d(output_size=(192, 128))

        conv_init_mean = 0
        conv_init_std = .1
        xavier_normal_gain = 1
        input_count = rgb

        self.bn_input = nn.BatchNorm2d(input_count)
        nn.init.normal_(self.bn_input.weight, conv_init_mean, conv_init_std)

        output_count = 64
        self.conv_00 = nn.Conv2d(input_count,
                                 output_count,
                                 kernel_size=(15, 10),
                                 stride=1,
                                 padding=0)
        nn.init.xavier_normal_(self.conv_00.weight, gain=xavier_normal_gain)
        self.max_pool2d_00 = nn.MaxPool2d(
            kernel_size=(15, 10),
            stride=(2, 2),
        )
        self.bn_00 = nn.BatchNorm2d(output_count)
        nn.init.normal_(self.bn_00.weight, conv_init_mean, conv_init_std)

        input_count = 64

        self.conv_01 = nn.Conv2d(input_count,
                                 output_count,
                                 kernel_size=(15, 10),
                                 stride=1,
                                 padding=0)
        nn.init.xavier_normal_(self.conv_01.weight, gain=xavier_normal_gain)
        self.max_pool2d_01 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1))
        self.bn_01 = nn.BatchNorm2d(output_count)
        nn.init.normal_(self.bn_01.weight, conv_init_mean, conv_init_std)

        output_count = 128
        self.conv_10 = nn.Conv2d(input_count,
                                 output_count,
                                 kernel_size=(15, 10),
                                 stride=1,
                                 padding=0)
        nn.init.xavier_normal_(self.conv_10.weight, gain=xavier_normal_gain)
        self.max_pool2d_10 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1))
        self.bn_10 = nn.BatchNorm2d(output_count)
        nn.init.normal_(self.bn_10.weight, conv_init_mean, conv_init_std)

        input_count = 128

        output_count = 128

        self.gcb = GCBlock(output_count)

        self.conv_20 = nn.Conv2d(input_count,
                                 output_count,
                                 kernel_size=(12, 10),
                                 stride=1,
                                 padding=0)
        self.max_pool2d_20 = nn.MaxPool2d(kernel_size=(15, 10), stride=(1, 1))
        self.bn_20 = nn.BatchNorm2d(output_count)

        input_count = 128
        self.conv_last = nn.Conv2d(input_count,
                                   1,
                                   kernel_size=1,
                                   stride=1,
                                   padding=0)
        nn.init.xavier_normal_(self.conv_last.weight, gain=xavier_normal_gain)

        self.gradients = None
    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=True,
                 normalize_input=True,
                 requires_grad=False,
                 use_fid_inception=True):
        """Build pretrained InceptionV3

        Parameters
        ----------
        output_blocks : list of int
            Indices of blocks to return features of. Possible values are:
                - 0: corresponds to output of first max pooling
                - 1: corresponds to output of second max pooling
                - 2: corresponds to output which is fed to aux classifier
                - 3: corresponds to output of final average pooling
        resize_input : bool
            If true, bilinearly resizes input to width and height 299 before
            feeding input to model. As the network without fully connected
            layers is fully convolutional, it should be able to handle inputs
            of arbitrary size, so resizing might not be strictly needed
        normalize_input : bool
            If true, scales the input from range (0, 1) to the range the
            pretrained Inception network expects, namely (-1, 1)
        requires_grad : bool
            If true, parameters of the model require gradients. Possibly useful
            for finetuning the network
        use_fid_inception : bool
            If true, uses the pretrained Inception model used in Tensorflow's
            FID implementation. If false, uses the pretrained Inception model
            available in torchvision. The FID Inception model has different
            weights and a slightly different structure from torchvision's
            Inception model. If you want to compute FID scores, you are
            strongly advised to set this parameter to true to get comparable
            results.
        """
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = nn.ModuleList()

        if use_fid_inception:
            inception = fid_inception_v3()
        else:
            inception = _inception_v3(pretrained=True)

        # Block 0: input to maxpool1
        block0 = [
            inception.Conv2d_1a_3x3, inception.Conv2d_2a_3x3,
            inception.Conv2d_2b_3x3,
            nn.MaxPool2d(kernel_size=3, stride=2)
        ]
        self.blocks.append(nn.Sequential(*block0))

        # Block 1: maxpool1 to maxpool2
        if self.last_needed_block >= 1:
            block1 = [
                inception.Conv2d_3b_1x1, inception.Conv2d_4a_3x3,
                nn.MaxPool2d(kernel_size=3, stride=2)
            ]
            self.blocks.append(nn.Sequential(*block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [
                inception.Mixed_5b,
                inception.Mixed_5c,
                inception.Mixed_5d,
                inception.Mixed_6a,
                inception.Mixed_6b,
                inception.Mixed_6c,
                inception.Mixed_6d,
                inception.Mixed_6e,
            ]
            self.blocks.append(nn.Sequential(*block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                inception.Mixed_7a, inception.Mixed_7b, inception.Mixed_7c,
                nn.AdaptiveAvgPool2d(output_size=(1, 1))
            ]
            self.blocks.append(nn.Sequential(*block3))

        for param in self.parameters():
            param.requires_grad = requires_grad
Exemple #23
0
    def __init__(self):
        self.dropout_value = 0.15
        super(QuizDNN, self).__init__()
        
        
        self.convblock1 = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.convblock2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.convblock3 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.pool1 = nn.MaxPool2d(2, 2)

        self.convblock4 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=(1, 1), bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )
        self.convblock5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )
        self.convblock6 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.pool2 = nn.MaxPool2d(2, 2)

        self.convblock7 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(1, 1), dilation=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )
        self.convblock8 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.MP3 = nn.MaxPool2d(2, 2)

        self.convblock9 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )
        self.convblock10 = nn.Sequential(
            nn.Conv2d(128, 128, kernel_size=(3, 3), padding=1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Dropout(self.dropout_value)
        )

        self.gap = nn.AdaptiveAvgPool2d(output_size=(1, 1))

        self.convblock11 = nn.Sequential(
            nn.Conv2d(128, 10, kernel_size=(1, 1), bias=False),
        )
    def __init__(self, in_channel, out_channel):
        super(FPA, self).__init__()

        self.c15_1 = nn.Conv2d(in_channel,
                               out_channel,
                               kernel_size=15,
                               stride=1,
                               padding=7,
                               bias=False)
        self.c11_1 = nn.Conv2d(in_channel,
                               out_channel,
                               kernel_size=11,
                               stride=1,
                               padding=5,
                               bias=False)
        self.c7_1 = nn.Conv2d(in_channel,
                              out_channel,
                              kernel_size=7,
                              stride=1,
                              padding=3,
                              bias=False)
        self.c3_1 = nn.Conv2d(in_channel,
                              out_channel,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=False)

        self.c15_2 = nn.Conv2d(in_channel,
                               out_channel,
                               kernel_size=15,
                               stride=1,
                               padding=7,
                               bias=False)
        self.c11_2 = nn.Conv2d(in_channel,
                               out_channel,
                               kernel_size=11,
                               stride=1,
                               padding=5,
                               bias=False)
        self.c7_2 = nn.Conv2d(in_channel,
                              out_channel,
                              kernel_size=7,
                              stride=1,
                              padding=3,
                              bias=False)
        self.c3_2 = nn.Conv2d(in_channel,
                              out_channel,
                              kernel_size=3,
                              stride=1,
                              padding=1,
                              bias=False)

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.c1_gpb = nn.Conv2d(in_channel,
                                out_channel,
                                kernel_size=1,
                                bias=False)

        self.bn = nn.BatchNorm2d(out_channel)
        self.relu = nn.ReLU(inplace=True)
Exemple #25
0
 def _make_stage(self, features, size):
     prior = nn.AdaptiveAvgPool2d(output_size=(size, size))
     conv = nn.Conv2d(features, features, kernel_size=1, bias=False)
     return nn.Sequential(prior, conv)
    def __init__(self,
                 block_units,
                 width_factor,
                 head_size=21843,
                 zero_head=False):
        super().__init__()
        wf = width_factor  # shortcut 'cause we'll use it a lot.

        # The following will be unreadable if we split lines.
        # pylint: disable=line-too-long
        self.root = nn.Sequential(
            OrderedDict([
                (
                    "conv",
                    StdConv2d(3,
                              64 * wf,
                              kernel_size=7,
                              stride=2,
                              padding=3,
                              bias=False),
                ),
                ("pad", nn.ConstantPad2d(1, 0)),
                ("pool", nn.MaxPool2d(kernel_size=3, stride=2, padding=0)),
                # The following is subtly not the same!
                # ('pool', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
            ]))

        self.body = nn.Sequential(
            OrderedDict([
                (
                    "block1",
                    nn.Sequential(
                        OrderedDict([(
                            "unit01",
                            PreActBottleneck(
                                cin=64 * wf, cout=256 * wf, cmid=64 * wf),
                        )] + [(
                            f"unit{i:02d}",
                            PreActBottleneck(
                                cin=256 * wf, cout=256 * wf, cmid=64 * wf),
                        ) for i in range(2, block_units[0] + 1)], )),
                ),
                (
                    "block2",
                    nn.Sequential(
                        OrderedDict([(
                            "unit01",
                            PreActBottleneck(
                                cin=256 * wf,
                                cout=512 * wf,
                                cmid=128 * wf,
                                stride=2,
                            ),
                        )] + [(
                            f"unit{i:02d}",
                            PreActBottleneck(
                                cin=512 * wf, cout=512 * wf, cmid=128 * wf),
                        ) for i in range(2, block_units[1] + 1)], )),
                ),
                (
                    "block3",
                    nn.Sequential(
                        OrderedDict([(
                            "unit01",
                            PreActBottleneck(
                                cin=512 * wf,
                                cout=1024 * wf,
                                cmid=256 * wf,
                                stride=2,
                            ),
                        )] + [(
                            f"unit{i:02d}",
                            PreActBottleneck(
                                cin=1024 * wf, cout=1024 * wf, cmid=256 * wf),
                        ) for i in range(2, block_units[2] + 1)], )),
                ),
                (
                    "block4",
                    nn.Sequential(
                        OrderedDict([(
                            "unit01",
                            PreActBottleneck(
                                cin=1024 * wf,
                                cout=2048 * wf,
                                cmid=512 * wf,
                                stride=2,
                            ),
                        )] + [(
                            f"unit{i:02d}",
                            PreActBottleneck(
                                cin=2048 * wf, cout=2048 * wf, cmid=512 * wf),
                        ) for i in range(2, block_units[3] + 1)], )),
                ),
            ]))
        # pylint: enable=line-too-long

        self.zero_head = zero_head
        self.head = nn.Sequential(
            OrderedDict([
                ("gn", nn.GroupNorm(32, 2048 * wf)),
                ("relu", nn.ReLU(inplace=True)),
                ("avg", nn.AdaptiveAvgPool2d(output_size=1)),
                ("conv",
                 nn.Conv2d(2048 * wf, head_size, kernel_size=1, bias=True)),
            ]))
    def __init__(self,
                 out_planes,
                 is_training,
                 criterion,
                 ohem_criterion,
                 pretrained_model=None,
                 norm_layer=nn.BatchNorm2d):
        super(BiSeNet, self).__init__()
        self.context_path = xception39(pretrained_model, norm_layer=norm_layer)

        self.business_layer = []
        self.is_training = is_training

        self.spatial_path = SpatialPath(3, 128, norm_layer)

        conv_channel = 128
        self.global_context = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            ConvBnRelu(256,
                       conv_channel,
                       1,
                       1,
                       0,
                       has_bn=True,
                       has_relu=True,
                       has_bias=False,
                       norm_layer=norm_layer))

        # stage = [256, 128, 64]
        arms = [
            AttentionRefinement(256, conv_channel, norm_layer),
            AttentionRefinement(128, conv_channel, norm_layer)
        ]
        refines = [
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=norm_layer,
                       has_relu=True,
                       has_bias=False),
            ConvBnRelu(conv_channel,
                       conv_channel,
                       3,
                       1,
                       1,
                       has_bn=True,
                       norm_layer=norm_layer,
                       has_relu=True,
                       has_bias=False)
        ]

        if is_training:
            heads = [
                BiSeNetHead(conv_channel, out_planes, 2, True, norm_layer),
                BiSeNetHead(conv_channel, out_planes, 1, True, norm_layer),
                BiSeNetHead(conv_channel * 2, out_planes, 1, False, norm_layer)
            ]
        else:
            heads = [
                None, None,
                BiSeNetHead(conv_channel * 2, out_planes, 1, False, norm_layer)
            ]

        self.ffm = FeatureFusion(conv_channel * 2, conv_channel * 2, 1,
                                 norm_layer)

        self.arms = nn.ModuleList(arms)
        self.refines = nn.ModuleList(refines)
        self.heads = nn.ModuleList(heads)

        self.business_layer.append(self.spatial_path)
        self.business_layer.append(self.global_context)
        self.business_layer.append(self.arms)
        self.business_layer.append(self.refines)
        self.business_layer.append(self.heads)
        self.business_layer.append(self.ffm)

        if is_training:
            self.criterion = criterion
            self.ohem_criterion = ohem_criterion
    def __init__(self,
                 net_type='mixnet_m',
                 input_size=32,
                 num_classes=100,
                 stem_channels=16,
                 feature_size=1536,
                 depth_multiplier=1.0):
        super(MixNet, self).__init__()

        if net_type == 'mixnet_s':
            config = self.mixnet_s
            stem_channels = 16
            dropout_rate = 0.2
        elif net_type == 'mixnet_m':
            config = self.mixnet_m
            stem_channels = 24
            dropout_rate = 0.25
        elif net_type == 'mixnet_l':
            config = self.mixnet_m
            stem_channels = 24
            depth_multiplier *= 1.3
            dropout_rate = 0.25
        else:
            raise TypeError('Unsupported MixNet type')

        assert input_size % 32 == 0

        # depth multiplier
        if depth_multiplier != 1.0:
            stem_channels = _RoundChannels(stem_channels * depth_multiplier)

            for i, conf in enumerate(config):
                conf_ls = list(conf)
                conf_ls[0] = _RoundChannels(conf_ls[0] * depth_multiplier)
                conf_ls[1] = _RoundChannels(conf_ls[1] * depth_multiplier)
                config[i] = tuple(conf_ls)

        # stem convolution
        self.stem_conv = Conv3x3Bn(3, stem_channels, 1)

        # building MixNet blocks
        layers = []
        for in_channels, out_channels, kernel_size, expand_ksize, project_ksize, stride, expand_ratio, non_linear, se_ratio in config:
            layers.append(
                MixNetBlock(in_channels,
                            out_channels,
                            kernel_size=kernel_size,
                            expand_ksize=expand_ksize,
                            project_ksize=project_ksize,
                            stride=stride,
                            expand_ratio=expand_ratio,
                            non_linear=non_linear,
                            se_ratio=se_ratio))
        self.layers = nn.Sequential(*layers)

        # last several layers
        self.head_conv = Conv1x1Bn(config[-1][1], feature_size)

        #self.avgpool = nn.AvgPool2d(input_size//32, stride=1)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(dropout_rate)
        self.classifier = nn.Linear(feature_size, num_classes)

        self._initialize_weights()
 def __init__(self, c_in, c_out):
     super(_ImagePool, self).__init__()
     self.pool = nn.AdaptiveAvgPool2d(1)
     self.conv = _ConvBnReLU(c_in, c_out, 1, 1, 0, 1)
Exemple #30
0
 def __init__(self,
              block,
              layers,
              groups,
              reduction,
              dropout_p=0.2,
              inplanes=128,
              input_3x3=True,
              downsample_kernel_size=3,
              downsample_padding=1,
              num_classes=1000):
     """
     Parameters
     ----------
     block (nn.Module): Bottleneck class.
         - For SENet154: SEBottleneck
         - For SE-ResNet models: SEResNetBottleneck
         - For SE-ResNeXt models:  SEResNeXtBottleneck
     layers (list of ints): Number of residual blocks for 4 layers of the
         network (layer1...layer4).
     groups (int): Number of groups for the 3x3 convolution in each
         bottleneck block.
         - For SENet154: 64
         - For SE-ResNet models: 1
         - For SE-ResNeXt models:  32
     reduction (int): Reduction ratio for Squeeze-and-Excitation modules.
         - For all models: 16
     dropout_p (float or None): Drop probability for the Dropout layer.
         If `None` the Dropout layer is not used.
         - For SENet154: 0.2
         - For SE-ResNet models: None
         - For SE-ResNeXt models: None
     inplanes (int):  Number of input channels for layer1.
         - For SENet154: 128
         - For SE-ResNet models: 64
         - For SE-ResNeXt models: 64
     input_3x3 (bool): If `True`, use three 3x3 convolutions instead of
         a single 7x7 convolution in layer0.
         - For SENet154: True
         - For SE-ResNet models: False
         - For SE-ResNeXt models: False
     downsample_kernel_size (int): Kernel size for downsampling convolutions
         in layer2, layer3 and layer4.
         - For SENet154: 3
         - For SE-ResNet models: 1
         - For SE-ResNeXt models: 1
     downsample_padding (int): Padding for downsampling convolutions in
         layer2, layer3 and layer4.
         - For SENet154: 1
         - For SE-ResNet models: 0
         - For SE-ResNeXt models: 0
     num_classes (int): Number of outputs in `last_linear` layer.
         - For all models: 1000
     """
     super(SENet, self).__init__()
     self.inplanes = inplanes
     if input_3x3:
         layer0_modules = [
             ('conv1', nn.Conv2d(3, 64, 3, stride=2, padding=1,
                                 bias=False)),
             ('bn1', nn.BatchNorm2d(64)),
             ('relu1', nn.ReLU(inplace=True)),
             ('conv2', nn.Conv2d(64, 64, 3, stride=1, padding=1,
                                 bias=False)),
             ('bn2', nn.BatchNorm2d(64)),
             ('relu2', nn.ReLU(inplace=True)),
             ('conv3',
              nn.Conv2d(64, inplanes, 3, stride=1, padding=1, bias=False)),
             ('bn3', nn.BatchNorm2d(inplanes)),
             ('relu3', nn.ReLU(inplace=True)),
         ]
     else:
         layer0_modules = [
             ('conv1',
              nn.Conv2d(3,
                        inplanes,
                        kernel_size=7,
                        stride=2,
                        padding=3,
                        bias=False)),
             ('bn1', nn.BatchNorm2d(inplanes)),
             ('relu1', nn.ReLU(inplace=True)),
         ]
     # To preserve compatibility with Caffe weights `ceil_mode=True`
     # is used instead of `padding=1`.
     layer0_modules.append(('pool', nn.MaxPool2d(3,
                                                 stride=2,
                                                 ceil_mode=True)))
     self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
     self.layer1 = self._make_layer(block,
                                    planes=64,
                                    blocks=layers[0],
                                    groups=groups,
                                    reduction=reduction,
                                    downsample_kernel_size=1,
                                    downsample_padding=0)
     self.layer2 = self._make_layer(
         block,
         planes=128,
         blocks=layers[1],
         stride=2,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     self.layer3 = self._make_layer(
         block,
         planes=256,
         blocks=layers[2],
         stride=2,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     self.layer4 = self._make_layer(
         block,
         planes=512,
         blocks=layers[3],
         stride=2,
         groups=groups,
         reduction=reduction,
         downsample_kernel_size=downsample_kernel_size,
         downsample_padding=downsample_padding)
     # self.avg_pool = nn.AvgPool2d(7, stride=1)
     self.avg_pool = nn.AdaptiveAvgPool2d((1, 1))
     self.dropout = nn.Dropout(dropout_p) if dropout_p is not None else None
     self.last_linear = nn.Linear(512 * block.expansion, num_classes)