def _make_layers(self, cfg, nbits_w, nbits_a, q_mode): layers = [] in_channels = 3 # change to actq+convq by Joey.Z on May 28 2019 for i, x in enumerate(cfg): if x == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] elif in_channels == 3: # first layer layers += [ ActQv2(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True, l2=self.l2), Conv2dQv2(in_channels, x, kernel_size=3, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=self.l2), nn.BatchNorm2d(x), nn.ReLU(inplace=True), ] in_channels = x elif i == 7: # last layer layers += [ ActQv2(nbits=nbits_a, l2=self.l2), Conv2dQv2(in_channels, x, kernel_size=3, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=self.l2), nn.BatchNorm2d(x), nn.ReLU(inplace=True), ] else: layers += [ ActQv2(nbits=nbits_a, l2=self.l2), Conv2dQv2(in_channels, x, kernel_size=3, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=self.l2), nn.BatchNorm2d(x), nn.ReLU(inplace=True), ] in_channels = x return nn.Sequential(*layers)
def _make_layers(self, cfg, nbits_w, nbits_a, q_mode): layers = [] in_channels = 3 for i, x in enumerate(cfg): if x == 'M': layers += [nn.MaxPool2d(kernel_size=2, stride=2)] elif in_channels == 3: # do not quantize first layer layers += [ nn.Conv2d(in_channels, x, kernel_size=3, padding=1, bias=False), nn.BatchNorm2d(x), nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=self.l2) ] in_channels = x else: layers += [ Conv2dQv2(in_channels, x, kernel_size=3, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=self.l2), nn.BatchNorm2d(x), nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=self.l2) ] in_channels = x return nn.Sequential(*layers)
def __init__(self, block, layers, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True, **kwargs): self.inplanes = 64 super(ResNetQFNv2, self).__init__() # We don't quantize first layer self.nbits_w = nbits_w self.nbits_a = nbits_a self.q_mode = q_mode self.l2 = l2 self.conv1 = nn.Sequential( ActQv2(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True), Conv2dQv2(3, 64, kernel_size=7, stride=2, padding=3, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(64), nn.ReLU(inplace=True)) # del ActQ as LQ-Net self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) # self.avgpool = nn.Sequential(nn.AvgPool2d(7, stride=1), ) # del ActQ as LQ-Net # self.fc = nn.Linear(512 * block.expansion, num_classes) self.avgpool = nn.Sequential(nn.AvgPool2d(7, stride=1), ActQv2(nbits=nbits_a)) # del ActQ as LQ-Net self.fc = LinearQv2(512 * block.expansion, num_classes, nbits=nbits_w) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def conv_bn_qv2(inp, oup, stride, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise): return nn.Sequential( Conv2dQv2(inp, oup, 3, stride, 1, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True), ActQv2(nbits=nbits_a))
def _make_layer(self, block, planes, blocks, stride=1): downsample = None if stride != 1 or self.inplanes != planes * block.expansion: downsample = nn.Sequential( Conv2dQv2(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride, bias=False, nbits=self.nbits_w, mode=self.q_mode, l2=self.l2), nn.BatchNorm2d(planes * block.expansion), ActQv2(nbits=self.nbits_a, signed=True, l2=self.l2), # different with pre-trained model ) layers = [] layers.append(block(self.inplanes, planes, stride, downsample, nbits_w=self.nbits_w, nbits_a=self.nbits_a, q_mode=self.q_mode, l2=self.l2)) self.inplanes = planes * block.expansion for i in range(1, blocks): layers.append(block(self.inplanes, planes, nbits_w=self.nbits_w, nbits_a=self.nbits_a, q_mode=self.q_mode, l2=self.l2)) return nn.Sequential(*layers)
def __init__(self, inp, oup, stride, expand_ratio, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise): super(InvertedResidualQv2, self).__init__() self.stride = stride assert stride in [1, 2] hidden_dim = round(inp * expand_ratio) self.use_res_connect = self.stride == 1 and inp == oup self.out_actq = ActQv2(nbits_a, signed=True) if expand_ratio == 1: self.conv = nn.Sequential( # dw Conv2dQv2(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), ActQv2(nbits=nbits_a), # pw-linear Conv2dQv2(hidden_dim, oup, 1, 1, 0, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(oup), ActQv2(nbits=nbits_a, signed=True), ) else: self.conv = nn.Sequential( # pw Conv2dQv2(inp, hidden_dim, 1, 1, 0, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), ActQv2(nbits=nbits_a), # dw Conv2dQv2(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(hidden_dim), nn.ReLU6(inplace=True), ActQv2(nbits=nbits_a), # pw-linear Conv2dQv2(hidden_dim, oup, 1, 1, 0, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(oup), ActQv2(nbits=nbits_a, signed=True), )
def conv_1x1_bn_qv2(inp, oup, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise): return nn.Sequential( Conv2dQv2(inp, oup, 1, 1, 0, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(oup), nn.ReLU6(inplace=True), # no last actq ActQv2(nbits=nbits_a))
def __init__(self, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(AlexNetQFNv2, self).__init__() self.features = nn.Sequential( ActQv2(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True, l2=l2), Conv2dQv2(3, 64, kernel_size=11, stride=4, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv1 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(64, 192, kernel_size=5, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv2 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(192, 384, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv3 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(384, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv4 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(256, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv5 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ) self.classifier = nn.Sequential( ActQv2(nbits=nbits_a, l2=l2), DropoutScale(), LinearQv2(256 * 6 * 6, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc6 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), DropoutScale(), LinearQv2(4096, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc7 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), # key layer LinearQv2(4096, num_classes, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc8 )
def __init__(self, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(AlexNetQv2, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), # conv1 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(64, 192, kernel_size=5, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv2 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(192, 384, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv3 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(384, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv4 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), Conv2dQv2(256, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv5 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQv2(nbits=nbits_a, l2=l2), ) self.classifier = nn.Sequential( DropoutScale(), # As the experiment result shows, there is no difference between layer wise with kernel wise. LinearQv2(256 * 6 * 6, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc6 nn.ReLU(inplace=True), ActQv2(nbits=nbits_a, l2=l2), DropoutScale(), LinearQv2(4096, 4096, nbits=nbits_w, mode=q_mode.layer_wise, l2=l2), # fc7 nn.ReLU(inplace=True), nn.Linear(4096, num_classes), # fc8 )
def convqv2_3x3(in_planes, out_planes, stride=1, nbits_w=4, q_mode=Qmodes.kernel_wise, l2=True): """3x3 convolution with padding""" return Conv2dQv2(in_planes, out_planes, kernel_size=3, stride=stride, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=l2)