Exemple #1
0
def set_running_statistics(model, data_loader, distributed=False):
    bn_mean = {}
    bn_var = {}

    forward_model = copy.deepcopy(model)
    for name, m in forward_model.named_modules():
        if isinstance(m, nn.BatchNorm2d):
            if distributed:
                bn_mean[name] = DistributedTensor(name + '#mean')
                bn_var[name] = DistributedTensor(name + '#var')
            else:
                bn_mean[name] = AverageMeter()
                bn_var[name] = AverageMeter()

            def new_forward(bn, mean_est, var_est):
                def lambda_forward(x):
                    batch_mean = x.mean(0, keepdim=True).mean(
                        2, keepdim=True).mean(3, keepdim=True)  # 1, C, 1, 1
                    batch_var = (x - batch_mean) * (x - batch_mean)
                    batch_var = batch_var.mean(0, keepdim=True).mean(
                        2, keepdim=True).mean(3, keepdim=True)

                    batch_mean = torch.squeeze(batch_mean)
                    batch_var = torch.squeeze(batch_var)

                    mean_est.update(batch_mean.data, x.size(0))
                    var_est.update(batch_var.data, x.size(0))

                    # bn forward using calculated mean & var
                    _feature_dim = batch_mean.size(0)
                    return F.batch_norm(
                        x,
                        batch_mean,
                        batch_var,
                        bn.weight[:_feature_dim],
                        bn.bias[:_feature_dim],
                        False,
                        0.0,
                        bn.eps,
                    )

                return lambda_forward

            m.forward = new_forward(m, bn_mean[name], bn_var[name])

    with torch.no_grad():
        DynamicBatchNorm2d.SET_RUNNING_STATISTICS = True
        for images, labels in data_loader:
            images = images.to(get_net_device(forward_model))
            forward_model(images)
        DynamicBatchNorm2d.SET_RUNNING_STATISTICS = False

    for name, m in model.named_modules():
        if name in bn_mean and bn_mean[name].count > 0:
            feature_dim = bn_mean[name].avg.size(0)
            assert isinstance(m, nn.BatchNorm2d)
            m.running_mean.data[:feature_dim].copy_(bn_mean[name].avg)
            m.running_var.data[:feature_dim].copy_(bn_var[name].avg)
    return model
Exemple #2
0
 def get_active_subnet(self, in_features, preserve_weight=True):
     sub_layer = LinearLayer(in_features, self.out_features, self.bias, dropout_rate=self.dropout_rate)
     sub_layer = sub_layer.to(get_net_device(self))
     if not preserve_weight:
         return sub_layer
     
     sub_layer.linear.weight.data.copy_(self.linear.linear.weight.data[:self.out_features, :in_features])
     if self.bias:
         sub_layer.linear.bias.data.copy_(self.linear.linear.bias.data[:self.out_features])
     return sub_layer
Exemple #3
0
    def get_active_subnet(self, in_channel, preserve_weight=True):
        middle_channel = make_divisible(
            round(in_channel * self.active_expand_ratio), 8)

        # build the new layer
        sub_layer = MBInvertedConvLayer(
            in_channel,
            self.active_out_channel,
            self.active_kernel_size,
            self.stride,
            self.active_expand_ratio,
            act_func=self.act_func,
            mid_channels=middle_channel,
            use_se=self.use_se,
        )
        sub_layer = sub_layer.to(get_net_device(self))

        if not preserve_weight:
            return sub_layer

        # copy weight from current layer
        if sub_layer.inverted_bottleneck is not None:
            sub_layer.inverted_bottleneck.conv.weight.data.copy_(
                self.inverted_bottleneck.conv.conv.weight.
                data[:middle_channel, :in_channel, :, :])
            copy_bn(sub_layer.inverted_bottleneck.bn,
                    self.inverted_bottleneck.bn.bn)

        sub_layer.depth_conv.conv.weight.data.copy_(
            self.depth_conv.conv.get_active_filter(
                middle_channel, self.active_kernel_size).data)
        copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn)

        if self.use_se:
            se_mid = make_divisible(middle_channel // SEModule.REDUCTION,
                                    divisor=8)
            sub_layer.depth_conv.se.fc.reduce.weight.data.copy_(
                self.depth_conv.se.fc.reduce.weight.
                data[:se_mid, :middle_channel, :, :])
            sub_layer.depth_conv.se.fc.reduce.bias.data.copy_(
                self.depth_conv.se.fc.reduce.bias.data[:se_mid])

            sub_layer.depth_conv.se.fc.expand.weight.data.copy_(
                self.depth_conv.se.fc.expand.weight.
                data[:middle_channel, :se_mid, :, :])
            sub_layer.depth_conv.se.fc.expand.bias.data.copy_(
                self.depth_conv.se.fc.expand.bias.data[:middle_channel])

        sub_layer.point_linear.conv.weight.data.copy_(
            self.point_linear.conv.conv.weight.
            data[:self.active_out_channel, :middle_channel, :, :])
        copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn)

        return sub_layer
Exemple #4
0
 def get_active_subnet(self, in_channel, preserve_weight=True):
     sub_layer = ConvLayer(
         in_channel, self.active_out_channel, self.kernel_size, self.stride, self.dilation,
         use_bn=self.use_bn, act_func=self.act_func
     )
     sub_layer = sub_layer.to(get_net_device(self))
     
     if not preserve_weight:
         return sub_layer
     
     sub_layer.conv.weight.data.copy_(self.conv.conv.weight.data[:self.active_out_channel, :in_channel, :, :])
     if self.use_bn:
         copy_bn(sub_layer.bn, self.bn.bn)
     
     return sub_layer
Exemple #5
0
    def get_active_subnet(self, in_channel, preserve_weight=True):
        middle_channel = make_divisible(
            round(in_channel * self.active_expand_ratio), 8)

        # build the new layer
        sub_layer = MBInvertedQConvLayer(
            in_channel,
            self.active_out_channel,
            self.active_kernel_size,
            self.stride,
            self.active_expand_ratio,
            act_func=self.act_func,
            mid_channels=middle_channel,
            pw_w_bit=self.point_linear.conv.w_bit,
            pw_a_bit=self.point_linear.conv.a_bit,
            dw_w_bit=self.depth_conv.conv.w_bit,
            dw_a_bit=self.depth_conv.conv.a_bit,
        )
        sub_layer = sub_layer.to(get_net_device(self))

        if not preserve_weight:
            return sub_layer

        # copy weight from current layer
        if sub_layer.inverted_bottleneck is not None:
            sub_layer.inverted_bottleneck.conv.weight.data.copy_(
                self.inverted_bottleneck.conv.conv.weight.
                data[:middle_channel, :in_channel, :, :])
            copy_bn(sub_layer.inverted_bottleneck.bn,
                    self.inverted_bottleneck.bn.bn)

        sub_layer.depth_conv.conv.weight.data.copy_(
            self.depth_conv.conv.get_active_filter(
                middle_channel, self.active_kernel_size).data)
        copy_bn(sub_layer.depth_conv.bn, self.depth_conv.bn.bn)

        sub_layer.point_linear.conv.weight.data.copy_(
            self.point_linear.conv.conv.weight.
            data[:self.active_out_channel, :middle_channel, :, :])
        copy_bn(sub_layer.point_linear.bn, self.point_linear.bn.bn)

        return sub_layer