def set_running_statistics(model, data_loader, distributed=False): bn_mean = {} bn_var = {} forward_model = copy.deepcopy(model) for name, m in forward_model.named_modules(): if isinstance(m, nn.BatchNorm2d): if distributed: bn_mean[name] = DistributedTensor(name + '#mean') bn_var[name] = DistributedTensor(name + '#var') else: bn_mean[name] = AverageMeter() bn_var[name] = AverageMeter() def new_forward(bn, mean_est, var_est): def lambda_forward(x): batch_mean = x.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True) # 1, C, 1, 1 batch_var = (x - batch_mean) * (x - batch_mean) batch_var = batch_var.mean(0, keepdim=True).mean( 2, keepdim=True).mean(3, keepdim=True) batch_mean = torch.squeeze(batch_mean) batch_var = torch.squeeze(batch_var) mean_est.update(batch_mean.data, x.size(0)) var_est.update(batch_var.data, x.size(0)) # bn forward using calculated mean & var _feature_dim = batch_mean.size(0) return F.batch_norm( x, batch_mean, batch_var, bn.weight[:_feature_dim], bn.bias[:_feature_dim], False, 0.0, bn.eps, ) return lambda_forward m.forward = new_forward(m, bn_mean[name], bn_var[name]) with torch.no_grad(): DynamicBatchNorm2d.SET_RUNNING_STATISTICS = True for images in data_loader: images = images['image'].to(get_net_device(forward_model)) forward_model(images) DynamicBatchNorm2d.SET_RUNNING_STATISTICS = False for name, m in model.named_modules(): if name in bn_mean and bn_mean[name].count > 0: feature_dim = bn_mean[name].avg.size(0) assert isinstance(m, nn.BatchNorm2d) m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg) m.running_var.data[:feature_dim].copy_(bn_var[name].avg)
def get_active_subnet(self, in_channel, preserve_weight=True): middle_channel = make_divisible( round(in_channel * self.active_expand_ratio), 8) # build the new layer sub_layer = MBInvertedConvLayer( in_channel, self.active_out_channel, self.active_kernel_size, self.stride, self.active_expand_ratio, act_func=self.act_func, mid_channels=middle_channel, use_se=self.use_se, ) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer # copy weight from current layer if sub_layer.inverted_bottleneck is not None: sub_layer.inverted_bottleneck.conv.weight.data.copy_( self.inverted_bottleneck.conv.conv.weight. data[:middle_channel, :in_channel, :, :]) copy_bn(sub_layer.inverted_bottleneck.bn, self.inverted_bottleneck.bn.bn) sub_layer.depth_conv.conv.weight.data.copy_( self.depth_conv.conv.get_active_filter( middle_channel, self.active_kernel_size).data) copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn) if self.use_se: se_mid = make_divisible(middle_channel // SEModule.REDUCTION, divisor=8) sub_layer.depth_conv.se.fc.reduce.weight.data.copy_( self.depth_conv.se.fc.reduce.weight. data[:se_mid, :middle_channel, :, :]) sub_layer.depth_conv.se.fc.reduce.bias.data.copy_( self.depth_conv.se.fc.reduce.bias.data[:se_mid]) sub_layer.depth_conv.se.fc.expand.weight.data.copy_( self.depth_conv.se.fc.expand.weight. data[:middle_channel, :se_mid, :, :]) sub_layer.depth_conv.se.fc.expand.bias.data.copy_( self.depth_conv.se.fc.expand.bias.data[:middle_channel]) sub_layer.point_linear.conv.weight.data.copy_( self.point_linear.conv.conv.weight. data[:self.active_out_channel, :middle_channel, :, :]) copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn) return sub_layer
def get_active_subnet(self, in_features, preserve_weight=True): sub_layer = LinearLayer(in_features, self.out_features, self.bias, dropout_rate=self.dropout_rate) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.linear.weight.data.copy_( self.linear.linear.weight.data[:self.out_features, :in_features]) if self.bias: sub_layer.linear.bias.data.copy_( self.linear.linear.bias.data[:self.out_features]) return sub_layer
def get_active_subnet(self, in_channel, preserve_weight=True): sub_layer = ConvLayer(in_channel, self.active_out_channel, self.kernel_size, self.stride, self.dilation, use_bn=self.use_bn, act_func=self.act_func) sub_layer = sub_layer.to(get_net_device(self)) if not preserve_weight: return sub_layer sub_layer.conv.weight.data.copy_( self.conv.conv.weight.data[:self. active_out_channel, :in_channel, :, :]) if self.use_bn: copy_bn(sub_layer.bn, self.bn.bn) return sub_layer