Exemple #1
0
    def __init__(self,
                 input_size=224,
                 n_class=1000,
                 architecture=None,
                 model_size='Large'):
        super(ShuffleNetV2_Plus, self).__init__()

        print('model size is ', model_size)

        assert input_size % 32 == 0
        assert architecture is not None

        self.stage_repeats = [4, 4, 8, 4]
        if model_size == 'Large':
            self.stage_out_channels = [-1, 16, 68, 168, 336, 672, 1280]
        elif model_size == 'Medium':
            self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1280]
        elif model_size == 'Small':
            self.stage_out_channels = [-1, 16, 36, 104, 208, 416, 1280]
        else:
            raise NotImplementedError

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            nn.BatchNorm2d(input_channel),
            HS(),
        )

        self.features = []
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            activation = 'HS' if idxstage >= 1 else 'ReLU'
            useSE = 'True' if idxstage >= 2 else False

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                blockIndex = architecture[archIndex]
                archIndex += 1
                if blockIndex == 0:
                    print('Shuffle3x3')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   base_mid_channels=outp // 2,
                                   ksize=3,
                                   stride=stride,
                                   activation=activation,
                                   useSE=useSE))
                elif blockIndex == 1:
                    print('Shuffle5x5')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   base_mid_channels=outp // 2,
                                   ksize=5,
                                   stride=stride,
                                   activation=activation,
                                   useSE=useSE))
                elif blockIndex == 2:
                    print('Shuffle7x7')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   base_mid_channels=outp // 2,
                                   ksize=7,
                                   stride=stride,
                                   activation=activation,
                                   useSE=useSE))
                elif blockIndex == 3:
                    print('Xception')
                    self.features.append(
                        Shuffle_Xception(inp,
                                         outp,
                                         base_mid_channels=outp // 2,
                                         stride=stride,
                                         activation=activation,
                                         useSE=useSE))
                else:
                    raise NotImplementedError
                input_channel = output_channel
        assert archIndex == len(architecture)
        self.features = nn.Sequential(*self.features)

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel, 1280, 1, 1, 0, bias=False),
            nn.BatchNorm2d(1280), HS())
        self.globalpool = nn.AvgPool2d(7)
        self.LastSE = SELayer(1280)
        self.fc = nn.Sequential(
            nn.Linear(1280, 1280, bias=False),
            HS(),
        )
        self.dropout = nn.Dropout(0.2)
        self.classifier = nn.Sequential(nn.Linear(1280, n_class, bias=False))
        self._initialize_weights()
    def __init__(self,
                 input_size=224,
                 n_class=1000,
                 architecture=None,
                 channels_scales=None):
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0
        assert architecture is not None and channels_scales is not None

        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            nn.BatchNorm2d(input_channel),
            nn.ReLU(inplace=True),
        )

        self.features = []
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                blockIndex = architecture[archIndex]
                base_mid_channels = outp // 2
                mid_channels = int(base_mid_channels *
                                   channels_scales[archIndex])
                archIndex += 1
                if blockIndex == 0:
                    print('Shuffle3x3')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=3,
                                   stride=stride))
                elif blockIndex == 1:
                    print('Shuffle5x5')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=5,
                                   stride=stride))
                elif blockIndex == 2:
                    print('Shuffle7x7')
                    self.features.append(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=7,
                                   stride=stride))
                elif blockIndex == 3:
                    print('Xception')
                    self.features.append(
                        Shuffle_Xception(inp,
                                         outp,
                                         mid_channels=mid_channels,
                                         stride=stride))
                else:
                    raise NotImplementedError
                input_channel = output_channel

        assert archIndex == len(architecture)
        self.features = nn.Sequential(*self.features)

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel,
                      self.stage_out_channels[-1],
                      1,
                      1,
                      0,
                      bias=False),
            nn.BatchNorm2d(self.stage_out_channels[-1]),
            nn.ReLU(inplace=True),
        )
        self.globalpool = nn.AvgPool2d(int(input_size / 32))
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(
            nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
        self._initialize_weights()
Exemple #3
0
    def __init__(self, input_size=224, n_class=1000, args=None, architecture=None, channels_scales=None):
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0
        assert architecture is not None and channels_scales is not None

        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
        
        self.args = args
        self.bn_affine = args.bn_affine
        self.bn_eps = args.bn_eps
        self.num_blocks = 4
        self.device = torch.device("cuda")
        if args.flops_loss:
            self.flops = torch.Tensor([[13396992., 15805440., 19418112., 13146112.],
            [ 7325696.,  8931328., 11339776., 12343296.],
            [ 7325696.,  8931328., 11339776., 12343296.],
            [ 7325696.,  8931328., 11339776., 12343296.],
            [26304768., 28111104., 30820608., 20296192.],
            [10599680., 11603200., 13108480., 16746240.],
            [10599680., 11603200., 13108480., 16746240.],
            [10599680., 11603200., 13108480., 16746240.],
            [30670080., 31673600., 33178880., 21199360.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [10317440., 10819200., 11571840., 15899520.],
            [30387840., 30889600., 31642240., 20634880.],
            [10176320., 10427200., 10803520., 15476160.],
            [10176320., 10427200., 10803520., 15476160.],
            [10176320., 10427200., 10803520., 15476160.]]).cuda()/1000000

        self.log_alpha = torch.nn.Parameter(
                torch.zeros(sum(self.stage_repeats), self.num_blocks).normal_(self.args.loc_mean, self.args.loc_std).cuda().requires_grad_())

        self._arch_parameters = [self.log_alpha]
        self.weights = Variable(torch.zeros_like(self.log_alpha))
        if self.args.early_fix_arch:
            self.fix_arch_index = {}

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            nn.BatchNorm2d(input_channel, eps=self.bn_eps),
            nn.ReLU(inplace=True),
        )

        self.features = nn.ModuleList()
        #self.features = []
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage+2]

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                #blockIndex = architecture[archIndex]
                base_mid_channels = outp // 2
                mid_channels = int(base_mid_channels * channels_scales[archIndex])
                archIndex += 1

                blocks = nn.ModuleList()
                blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps))
                blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps))
                blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps))
                blocks.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps))
                
                input_channel = output_channel
                self.features += [blocks]

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
            nn.BatchNorm2d(self.stage_out_channels[-1], eps=self.bn_eps),
            nn.ReLU(inplace=True),
        )
        self.globalpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
        self._initialize_weights()
    def __init__(self, input_size=224, n_class=1000):
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0

        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            nn.BatchNorm2d(input_channel, affine=False),
            nn.ReLU(inplace=True),
        )

        self.features = torch.nn.ModuleList()
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                base_mid_channels = outp // 2
                mid_channels = int(base_mid_channels)
                archIndex += 1
                self.features.append(torch.nn.ModuleList())
                for blockIndex in range(4):
                    if blockIndex == 0:
                        # print('Shuffle3x3')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=3,
                                       stride=stride))
                    elif blockIndex == 1:
                        # print('Shuffle5x5')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=5,
                                       stride=stride))
                    elif blockIndex == 2:
                        # print('Shuffle7x7')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=7,
                                       stride=stride))
                    elif blockIndex == 3:
                        # print('Xception')
                        self.features[-1].append(
                            Shuffle_Xception(inp,
                                             outp,
                                             mid_channels=mid_channels,
                                             stride=stride))
                    else:
                        raise NotImplementedError
                input_channel = output_channel

        self.archLen = archIndex
        # self.features = nn.Sequential(*self.features)

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel,
                      self.stage_out_channels[-1],
                      1,
                      1,
                      0,
                      bias=False),
            nn.BatchNorm2d(self.stage_out_channels[-1], affine=False),
            nn.ReLU(inplace=True),
        )
        self.globalpool = nn.AvgPool2d(7)
        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(
            nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
        self._initialize_weights()
Exemple #5
0
    def __init__(self, input_size=32, block=5, n_class=10):
        super(ShuffleNetV2_OneShot_cifar, self).__init__()

        assert input_size % 32 == 0

        # block
        if block == 20:
            self.stage_repeats = [4, 4, 8, 4]  # imagenet layer20
            self.stage_strides = [2, 2, 2, 2]  # every stage downsample
            # width:channel
            self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
            # 3-16, (16-64, 64-160, 160-320, 320-640), 640-1000
            # building first layer
            input_channel = self.stage_out_channels[1]
            self.first_conv = nn.Sequential(
                nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
                nn.BatchNorm2d(input_channel, affine=False),
                nn.ReLU(inplace=True),
            )

        elif block == 5:
            self.stage_repeats = [1, 1, 2, 1]  # cifar layer5
            self.stage_strides = [1, 1, 2, 2]  # downsample
            # width:channel
            # 3-16, (16-16, 16-16, 16-32, 32-64), 64-10
            self.stage_out_channels = [-1, 16, 16, 16, 32, 64, 128]

            # building first layer
            input_channel = self.stage_out_channels[1]
            self.first_conv = nn.Sequential(
                nn.Conv2d(3, input_channel, 3, 1, 1, bias=False),
                nn.BatchNorm2d(input_channel, affine=False),
                nn.ReLU(inplace=True),
            )

        self.features = torch.nn.ModuleList()
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):  # idxstage 0, 1, 2, 3
            numrepeat = self.stage_repeats[idxstage]  # 1, 1, 2, 1
            output_channel = self.stage_out_channels[idxstage +
                                                     2]  # find output channel
            aa = self.stage_strides[idxstage]

            for i in range(numrepeat):
                if i == 0 and aa == 2:  # first conv must down sample
                    inp, outp, stride = input_channel, output_channel, aa
                elif i == 0 and aa == 1:
                    inp, outp, stride = input_channel // 2, output_channel, aa
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                base_mid_channels = outp // 2
                mid_channels = int(base_mid_channels)
                archIndex += 1  # 每个stage 中conv的id
                self.features.append(torch.nn.ModuleList())
                for blockIndex in range(4):
                    if blockIndex == 0:
                        # print('Shuffle3x3')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=3,
                                       stride=stride))
                    elif blockIndex == 1:
                        # print('Shuffle5x5')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=5,
                                       stride=stride))
                    elif blockIndex == 2:
                        # print('Shuffle7x7')
                        self.features[-1].append(
                            Shufflenet(inp,
                                       outp,
                                       mid_channels=mid_channels,
                                       ksize=7,
                                       stride=stride))
                    elif blockIndex == 3:
                        # print('Xception')
                        self.features[-1].append(
                            Shuffle_Xception(inp,
                                             outp,
                                             mid_channels=mid_channels,
                                             stride=stride))
                    else:
                        raise NotImplementedError
                input_channel = output_channel

        self.archLen = archIndex
        # self.features = nn.Sequential(*self.features)

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel,
                      self.stage_out_channels[-1],
                      1,
                      1,
                      0,
                      bias=False),
            nn.BatchNorm2d(self.stage_out_channels[-1], affine=False),
            nn.ReLU(inplace=True),
        )
        if block == 20:
            self.globalpool = nn.AvgPool2d(7)
        elif block == 5:
            self.globalpool = nn.AdaptiveAvgPool2d((1, 1))

        self.dropout = nn.Dropout(0.1)
        self.classifier = nn.Sequential(
            nn.Linear(self.stage_out_channels[-1], n_class, bias=False))
        self._initialize_weights()
    def __init__(self, input_size=224, n_class=1000, args=None, architecture=None, channels_scales=None, weights=None):
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0
        assert architecture is not None and channels_scales is not None

        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
        
        self.args = args
        self.bn_affine = args.bn_affine
        self.bn_eps = args.bn_eps
        self.num_blocks = 4
        self.device = torch.device("cuda")

        self.log_alpha = torch.nn.Parameter(
                torch.zeros(sum(self.stage_repeats), self.num_blocks).normal_(self.args.loc_mean, self.args.loc_std).cuda().requires_grad_())

        self._arch_parameters = [self.log_alpha]
        #self.weights = Variable(torch.zeros_like(self.log_alpha))
        self.weights = weights

        # building first layer
        input_channel = self.stage_out_channels[1]
        self.first_conv = nn.Sequential(
            nn.Conv2d(3, input_channel, 3, 2, 1, bias=False),
            nn.BatchNorm2d(input_channel, eps=self.bn_eps),
            nn.ReLU(inplace=True),
        )

        self.features = nn.ModuleList()
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage+2]

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel // 2, output_channel, 1

                base_mid_channels = outp // 2
                mid_channels = int(base_mid_channels * channels_scales[archIndex])
                pos = (self.weights[archIndex,:] == 1).nonzero().item()
                archIndex += 1

                blocks = nn.ModuleList()
                if pos == 0:
                    blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps)
                    blocks.append(None)
                    blocks.append(None)
                    blocks.append(None)
                elif pos == 1:
                    blocks.append(None)
                    blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps)
                    blocks.append(None)
                    blocks.append(None)
                elif pos == 2:
                    blocks.append(None)
                    blocks.append(None)
                    blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, bn_affine=self.bn_affine)), bn_eps=self.bn_eps)
                    blocks.append(None)
                elif pos == 3:
                    blocks.append(None)
                    blocks.append(None)
                    blocks.append(None)
                    blocks.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps)
                    
                input_channel = output_channel
                self.features += [blocks]

        self.conv_last = nn.Sequential(
            nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False),
            nn.BatchNorm2d(self.stage_out_channels[-1], eps=self.bn_eps),
            nn.ReLU(inplace=True),
    def __init__(self,
                 input_size=224,
                 n_class=1000,
                 architecture=None,
                 channels_idx=None,
                 act_type='relu',
                 search=False):
        super(ShuffleNetV2_OneShot, self).__init__()

        assert input_size % 32 == 0
        assert architecture is not None and channels_idx is not None
        self.stage_repeats = [4, 4, 8, 4]
        self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024]
        self.candidate_scales = [
            0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0
        ]
        #self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1024]
        input_channel = self.stage_out_channels[1]
        self.search = search

        self.first_conv = nn.HybridSequential(prefix='first_')
        self.first_conv.add(
            nn.Conv2D(input_channel,
                      in_channels=3,
                      kernel_size=3,
                      strides=2,
                      padding=1,
                      use_bias=False))
        self.first_conv.add(
            nn.BatchNorm(in_channels=input_channel, momentum=0.1))
        self.first_conv.add(Activation(act_type))

        self.features = nn.HybridSequential(prefix='features_')
        archIndex = 0
        for idxstage in range(len(self.stage_repeats)):
            numrepeat = self.stage_repeats[idxstage]
            output_channel = self.stage_out_channels[idxstage + 2]

            for i in range(numrepeat):
                if i == 0:
                    inp, outp, stride = input_channel, output_channel, 2
                else:
                    inp, outp, stride = input_channel, output_channel, 1

                blockIndex = architecture[archIndex]
                base_mid_channels = outp // 2
                mid_channels = int(
                    base_mid_channels *
                    self.candidate_scales[channels_idx[archIndex]])
                archIndex += 1
                self.features.add(nn.HybridSequential(prefix=''))

                if blockIndex == 0:
                    #print('Shuffle3x3')
                    self.features[-1].add(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=3,
                                   stride=stride,
                                   act_type='relu',
                                   BatchNorm=nn.BatchNorm,
                                   search=self.search))
                elif blockIndex == 1:
                    #print('Shuffle5x5')
                    self.features[-1].add(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=5,
                                   stride=stride,
                                   act_type='relu',
                                   BatchNorm=nn.BatchNorm,
                                   search=self.search))
                elif blockIndex == 2:
                    #print('Shuffle7x7')
                    self.features[-1].add(
                        Shufflenet(inp,
                                   outp,
                                   mid_channels=mid_channels,
                                   ksize=7,
                                   stride=stride,
                                   act_type='relu',
                                   BatchNorm=nn.BatchNorm,
                                   search=self.search))
                elif blockIndex == 3:
                    #print('Xception')
                    self.features[-1].add(
                        Shuffle_Xception(inp,
                                         outp,
                                         mid_channels=mid_channels,
                                         stride=stride,
                                         act_type='relu',
                                         BatchNorm=nn.BatchNorm,
                                         search=self.search))
                else:
                    raise NotImplementedError
                input_channel = output_channel
        assert archIndex == len(architecture)
        self.conv_last = nn.HybridSequential(prefix='last_')
        self.conv_last.add(
            nn.Conv2D(self.stage_out_channels[-1],
                      in_channels=input_channel,
                      kernel_size=1,
                      strides=1,
                      padding=0,
                      use_bias=False))
        self.conv_last.add(
            nn.BatchNorm(in_channels=self.stage_out_channels[-1],
                         momentum=0.1))
        self.conv_last.add(Activation(act_type))

        self.globalpool = nn.GlobalAvgPool2D()
        self.output = nn.HybridSequential(prefix='output_')
        with self.output.name_scope():
            self.output.add(
                nn.Dropout(0.1),
                nn.Dense(units=n_class,
                         in_units=self.stage_out_channels[-1],
                         use_bias=False))