def __init__(self, input_size=224, n_class=1000, architecture=None, model_size='Large'): super(ShuffleNetV2_Plus, self).__init__() print('model size is ', model_size) assert input_size % 32 == 0 assert architecture is not None self.stage_repeats = [4, 4, 8, 4] if model_size == 'Large': self.stage_out_channels = [-1, 16, 68, 168, 336, 672, 1280] elif model_size == 'Medium': self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1280] elif model_size == 'Small': self.stage_out_channels = [-1, 16, 36, 104, 208, 416, 1280] else: raise NotImplementedError # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel), HS(), ) self.features = [] archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] activation = 'HS' if idxstage >= 1 else 'ReLU' useSE = 'True' if idxstage >= 2 else False for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel // 2, output_channel, 1 blockIndex = architecture[archIndex] archIndex += 1 if blockIndex == 0: print('Shuffle3x3') self.features.append( Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=3, stride=stride, activation=activation, useSE=useSE)) elif blockIndex == 1: print('Shuffle5x5') self.features.append( Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=5, stride=stride, activation=activation, useSE=useSE)) elif blockIndex == 2: print('Shuffle7x7') self.features.append( Shufflenet(inp, outp, base_mid_channels=outp // 2, ksize=7, stride=stride, activation=activation, useSE=useSE)) elif blockIndex == 3: print('Xception') self.features.append( Shuffle_Xception(inp, outp, base_mid_channels=outp // 2, stride=stride, activation=activation, useSE=useSE)) else: raise NotImplementedError input_channel = output_channel assert archIndex == len(architecture) self.features = nn.Sequential(*self.features) self.conv_last = nn.Sequential( nn.Conv2d(input_channel, 1280, 1, 1, 0, bias=False), nn.BatchNorm2d(1280), HS()) self.globalpool = nn.AvgPool2d(7) self.LastSE = SELayer(1280) self.fc = nn.Sequential( nn.Linear(1280, 1280, bias=False), HS(), ) self.dropout = nn.Dropout(0.2) self.classifier = nn.Sequential(nn.Linear(1280, n_class, bias=False)) self._initialize_weights()
def __init__(self, input_size=224, n_class=1000, architecture=None, channels_scales=None): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 assert architecture is not None and channels_scales is not None self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel), nn.ReLU(inplace=True), ) self.features = [] archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel // 2, output_channel, 1 blockIndex = architecture[archIndex] base_mid_channels = outp // 2 mid_channels = int(base_mid_channels * channels_scales[archIndex]) archIndex += 1 if blockIndex == 0: print('Shuffle3x3') self.features.append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride)) elif blockIndex == 1: print('Shuffle5x5') self.features.append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride)) elif blockIndex == 2: print('Shuffle7x7') self.features.append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride)) elif blockIndex == 3: print('Xception') self.features.append( Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride)) else: raise NotImplementedError input_channel = output_channel assert archIndex == len(architecture) self.features = nn.Sequential(*self.features) self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), nn.BatchNorm2d(self.stage_out_channels[-1]), nn.ReLU(inplace=True), ) self.globalpool = nn.AvgPool2d(int(input_size / 32)) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) self._initialize_weights()
def __init__(self, input_size=224, n_class=1000, args=None, architecture=None, channels_scales=None): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 assert architecture is not None and channels_scales is not None self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] self.args = args self.bn_affine = args.bn_affine self.bn_eps = args.bn_eps self.num_blocks = 4 self.device = torch.device("cuda") if args.flops_loss: self.flops = torch.Tensor([[13396992., 15805440., 19418112., 13146112.], [ 7325696., 8931328., 11339776., 12343296.], [ 7325696., 8931328., 11339776., 12343296.], [ 7325696., 8931328., 11339776., 12343296.], [26304768., 28111104., 30820608., 20296192.], [10599680., 11603200., 13108480., 16746240.], [10599680., 11603200., 13108480., 16746240.], [10599680., 11603200., 13108480., 16746240.], [30670080., 31673600., 33178880., 21199360.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [10317440., 10819200., 11571840., 15899520.], [30387840., 30889600., 31642240., 20634880.], [10176320., 10427200., 10803520., 15476160.], [10176320., 10427200., 10803520., 15476160.], [10176320., 10427200., 10803520., 15476160.]]).cuda()/1000000 self.log_alpha = torch.nn.Parameter( torch.zeros(sum(self.stage_repeats), self.num_blocks).normal_(self.args.loc_mean, self.args.loc_std).cuda().requires_grad_()) self._arch_parameters = [self.log_alpha] self.weights = Variable(torch.zeros_like(self.log_alpha)) if self.args.early_fix_arch: self.fix_arch_index = {} # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel, eps=self.bn_eps), nn.ReLU(inplace=True), ) self.features = nn.ModuleList() #self.features = [] archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage+2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel // 2, output_channel, 1 #blockIndex = architecture[archIndex] base_mid_channels = outp // 2 mid_channels = int(base_mid_channels * channels_scales[archIndex]) archIndex += 1 blocks = nn.ModuleList() blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps)) blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps)) blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps)) blocks.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, bn_affine=self.bn_affine, bn_eps=self.bn_eps)) input_channel = output_channel self.features += [blocks] self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), nn.BatchNorm2d(self.stage_out_channels[-1], eps=self.bn_eps), nn.ReLU(inplace=True), ) self.globalpool = nn.AvgPool2d(7) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential(nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) self._initialize_weights()
def __init__(self, input_size=224, n_class=1000): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel, affine=False), nn.ReLU(inplace=True), ) self.features = torch.nn.ModuleList() archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel // 2, output_channel, 1 base_mid_channels = outp // 2 mid_channels = int(base_mid_channels) archIndex += 1 self.features.append(torch.nn.ModuleList()) for blockIndex in range(4): if blockIndex == 0: # print('Shuffle3x3') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride)) elif blockIndex == 1: # print('Shuffle5x5') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride)) elif blockIndex == 2: # print('Shuffle7x7') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride)) elif blockIndex == 3: # print('Xception') self.features[-1].append( Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride)) else: raise NotImplementedError input_channel = output_channel self.archLen = archIndex # self.features = nn.Sequential(*self.features) self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), nn.BatchNorm2d(self.stage_out_channels[-1], affine=False), nn.ReLU(inplace=True), ) self.globalpool = nn.AvgPool2d(7) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) self._initialize_weights()
def __init__(self, input_size=32, block=5, n_class=10): super(ShuffleNetV2_OneShot_cifar, self).__init__() assert input_size % 32 == 0 # block if block == 20: self.stage_repeats = [4, 4, 8, 4] # imagenet layer20 self.stage_strides = [2, 2, 2, 2] # every stage downsample # width:channel self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] # 3-16, (16-64, 64-160, 160-320, 320-640), 640-1000 # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel, affine=False), nn.ReLU(inplace=True), ) elif block == 5: self.stage_repeats = [1, 1, 2, 1] # cifar layer5 self.stage_strides = [1, 1, 2, 2] # downsample # width:channel # 3-16, (16-16, 16-16, 16-32, 32-64), 64-10 self.stage_out_channels = [-1, 16, 16, 16, 32, 64, 128] # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 1, 1, bias=False), nn.BatchNorm2d(input_channel, affine=False), nn.ReLU(inplace=True), ) self.features = torch.nn.ModuleList() archIndex = 0 for idxstage in range(len(self.stage_repeats)): # idxstage 0, 1, 2, 3 numrepeat = self.stage_repeats[idxstage] # 1, 1, 2, 1 output_channel = self.stage_out_channels[idxstage + 2] # find output channel aa = self.stage_strides[idxstage] for i in range(numrepeat): if i == 0 and aa == 2: # first conv must down sample inp, outp, stride = input_channel, output_channel, aa elif i == 0 and aa == 1: inp, outp, stride = input_channel // 2, output_channel, aa else: inp, outp, stride = input_channel // 2, output_channel, 1 base_mid_channels = outp // 2 mid_channels = int(base_mid_channels) archIndex += 1 # 每个stage 中conv的id self.features.append(torch.nn.ModuleList()) for blockIndex in range(4): if blockIndex == 0: # print('Shuffle3x3') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride)) elif blockIndex == 1: # print('Shuffle5x5') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride)) elif blockIndex == 2: # print('Shuffle7x7') self.features[-1].append( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride)) elif blockIndex == 3: # print('Xception') self.features[-1].append( Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride)) else: raise NotImplementedError input_channel = output_channel self.archLen = archIndex # self.features = nn.Sequential(*self.features) self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), nn.BatchNorm2d(self.stage_out_channels[-1], affine=False), nn.ReLU(inplace=True), ) if block == 20: self.globalpool = nn.AvgPool2d(7) elif block == 5: self.globalpool = nn.AdaptiveAvgPool2d((1, 1)) self.dropout = nn.Dropout(0.1) self.classifier = nn.Sequential( nn.Linear(self.stage_out_channels[-1], n_class, bias=False)) self._initialize_weights()
def __init__(self, input_size=224, n_class=1000, args=None, architecture=None, channels_scales=None, weights=None): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 assert architecture is not None and channels_scales is not None self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] self.args = args self.bn_affine = args.bn_affine self.bn_eps = args.bn_eps self.num_blocks = 4 self.device = torch.device("cuda") self.log_alpha = torch.nn.Parameter( torch.zeros(sum(self.stage_repeats), self.num_blocks).normal_(self.args.loc_mean, self.args.loc_std).cuda().requires_grad_()) self._arch_parameters = [self.log_alpha] #self.weights = Variable(torch.zeros_like(self.log_alpha)) self.weights = weights # building first layer input_channel = self.stage_out_channels[1] self.first_conv = nn.Sequential( nn.Conv2d(3, input_channel, 3, 2, 1, bias=False), nn.BatchNorm2d(input_channel, eps=self.bn_eps), nn.ReLU(inplace=True), ) self.features = nn.ModuleList() archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage+2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel // 2, output_channel, 1 base_mid_channels = outp // 2 mid_channels = int(base_mid_channels * channels_scales[archIndex]) pos = (self.weights[archIndex,:] == 1).nonzero().item() archIndex += 1 blocks = nn.ModuleList() if pos == 0: blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps) blocks.append(None) blocks.append(None) blocks.append(None) elif pos == 1: blocks.append(None) blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps) blocks.append(None) blocks.append(None) elif pos == 2: blocks.append(None) blocks.append(None) blocks.append(Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, bn_affine=self.bn_affine)), bn_eps=self.bn_eps) blocks.append(None) elif pos == 3: blocks.append(None) blocks.append(None) blocks.append(None) blocks.append(Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, bn_affine=self.bn_affine), bn_eps=self.bn_eps) input_channel = output_channel self.features += [blocks] self.conv_last = nn.Sequential( nn.Conv2d(input_channel, self.stage_out_channels[-1], 1, 1, 0, bias=False), nn.BatchNorm2d(self.stage_out_channels[-1], eps=self.bn_eps), nn.ReLU(inplace=True),
def __init__(self, input_size=224, n_class=1000, architecture=None, channels_idx=None, act_type='relu', search=False): super(ShuffleNetV2_OneShot, self).__init__() assert input_size % 32 == 0 assert architecture is not None and channels_idx is not None self.stage_repeats = [4, 4, 8, 4] self.stage_out_channels = [-1, 16, 64, 160, 320, 640, 1024] self.candidate_scales = [ 0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0 ] #self.stage_out_channels = [-1, 16, 48, 128, 256, 512, 1024] input_channel = self.stage_out_channels[1] self.search = search self.first_conv = nn.HybridSequential(prefix='first_') self.first_conv.add( nn.Conv2D(input_channel, in_channels=3, kernel_size=3, strides=2, padding=1, use_bias=False)) self.first_conv.add( nn.BatchNorm(in_channels=input_channel, momentum=0.1)) self.first_conv.add(Activation(act_type)) self.features = nn.HybridSequential(prefix='features_') archIndex = 0 for idxstage in range(len(self.stage_repeats)): numrepeat = self.stage_repeats[idxstage] output_channel = self.stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: inp, outp, stride = input_channel, output_channel, 2 else: inp, outp, stride = input_channel, output_channel, 1 blockIndex = architecture[archIndex] base_mid_channels = outp // 2 mid_channels = int( base_mid_channels * self.candidate_scales[channels_idx[archIndex]]) archIndex += 1 self.features.add(nn.HybridSequential(prefix='')) if blockIndex == 0: #print('Shuffle3x3') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=3, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 1: #print('Shuffle5x5') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=5, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 2: #print('Shuffle7x7') self.features[-1].add( Shufflenet(inp, outp, mid_channels=mid_channels, ksize=7, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) elif blockIndex == 3: #print('Xception') self.features[-1].add( Shuffle_Xception(inp, outp, mid_channels=mid_channels, stride=stride, act_type='relu', BatchNorm=nn.BatchNorm, search=self.search)) else: raise NotImplementedError input_channel = output_channel assert archIndex == len(architecture) self.conv_last = nn.HybridSequential(prefix='last_') self.conv_last.add( nn.Conv2D(self.stage_out_channels[-1], in_channels=input_channel, kernel_size=1, strides=1, padding=0, use_bias=False)) self.conv_last.add( nn.BatchNorm(in_channels=self.stage_out_channels[-1], momentum=0.1)) self.conv_last.add(Activation(act_type)) self.globalpool = nn.GlobalAvgPool2D() self.output = nn.HybridSequential(prefix='output_') with self.output.name_scope(): self.output.add( nn.Dropout(0.1), nn.Dense(units=n_class, in_units=self.stage_out_channels[-1], use_bias=False))