def __init__(self, vgg_name, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(VGGQFI, self).__init__() self.l2 = l2 self.features = self._make_layers(cfg[vgg_name], nbits_w=nbits_w, nbits_a=nbits_a, q_mode=q_mode) self.last_features = nn.Sequential( Conv2dQ(512, 512, kernel_size=3, padding=1, bias=False, nbits=nbits_w, mode=q_mode, l2=self.l2), nn.BatchNorm2d(512), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=2, stride=2)) scale = 1 if vgg_name == 'VGG7Q': scale = 16 self.expand_classifier = nn.Sequential( ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, expand=True, l2=self.l2), LinearQ(512 * scale * 2, 10, nbits=nbits_w, l2=self.l2), )
def __init__(self, features, num_classes=1000, init_weights=True): super(VGGQ, self).__init__() self.features = features self.classifier = nn.Sequential( LinearQ(512 * 7 * 7, 4096), nn.ReLU(True), ActQ(), # nn.Dropout(), LinearQ(4096, 4096), nn.ReLU(True), ActQ(), # nn.Dropout(), nn.Linear(4096, num_classes), ) if init_weights: self._initialize_weights()
def __init__(self, block, layers, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise): self.inplanes = 64 super(ResNetQFN, self).__init__() self.nbits_w = nbits_w self.nbits_a = nbits_a self.q_mode = q_mode self.conv1 = nn.Sequential( ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True), Conv2dQ(3, 64, kernel_size=7, stride=2, padding=3, bias=False, nbits=nbits_w, mode=q_mode), nn.BatchNorm2d(64), nn.ReLU(inplace=True)) # del ActQ as LQ-Net self.maxpool = nn.Sequential(nn.MaxPool2d(kernel_size=3, stride=2, padding=1), ActQ(nbits=nbits_a)) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2) self.layer3 = self._make_layer(block, 256, layers[2], stride=2) self.layer4 = self._make_layer(block, 512, layers[3], stride=2) self.avgpool = nn.Sequential(nn.AvgPool2d(7, stride=1), ActQ(nbits=nbits_a)) # del ActQ as LQ-Net self.fc = LinearQ(512 * block.expansion, num_classes, nbits=nbits_w) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, vgg_name, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(VGGQFN, self).__init__() self.l2 = l2 self.features = self._make_layers(cfg[vgg_name], nbits_w=nbits_w, nbits_a=nbits_a, q_mode=q_mode) # self.last_actq = ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else nbits_a * 2, l2=self.l2) scale = 1 if vgg_name == 'VGG7': scale = 16 self.classifier = nn.Sequential( LinearQ(512 * scale, 10, nbits=nbits_w, l2=l2), )
def __init__(self, vgg_name, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(VGGQFN_PACT, self).__init__() self.l2 = l2 self.features = self._make_layers(cfg[vgg_name], nbits_w=nbits_w, nbits_a=nbits_a, q_mode=q_mode) scale = 1 if vgg_name == 'VGG7': scale = 16 self.classifier = nn.Sequential( PACT(nbits=nbits_a, inplace=False), LinearQ(512 * scale, 10, nbits=nbits_w, l2=l2), )
def __init__(self, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise): super(AlexNetQFI, self).__init__() self.nbits_w = nbits_w self.nbits_a = nbits_a self.q_mode = q_mode self.features = nn.Sequential( ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True), Conv2dQ(3, 64, kernel_size=11, stride=4, padding=2, nbits=nbits_w, mode=q_mode), nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a), Conv2dQ(64, 192, kernel_size=5, padding=2, nbits=nbits_w, mode=q_mode), # conv2 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a), Conv2dQ(192, 384, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode), # conv3 nn.ReLU(inplace=True), ActQ(nbits=nbits_a), Conv2dQ(384, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode), # conv4 nn.ReLU(inplace=True), ActQ(nbits=nbits_a), Conv2dQ(256, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode), # conv5 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a), ) self.classifier = nn.Sequential( # nn.Dropout(), LinearQ(256 * 6 * 6, 4096, nbits=nbits_w), # fc6 nn.ReLU(inplace=True), ActQ(nbits=nbits_a), # nn.Dropout(), LinearQ(4096, 4096, nbits=nbits_w), # fc7 nn.ReLU(inplace=True), ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, expand=True), ) # self.shared_fc = LinearQ(4096, num_classes, nbits=nbits_w) # self.last_add = EltwiseAdd(inplace=True) self.expand_fc = LinearQ(4096 * 2, num_classes, nbits=nbits_w) # fc8
def __init__(self, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(AlexNetQFN, self).__init__() self.features = nn.Sequential( ActQ(nbits=-1 if max(nbits_a, nbits_w) <= 0 else 8, signed=True, l2=l2), Conv2dQ(3, 64, kernel_size=11, stride=4, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv1 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a, l2=l2), Conv2dQ(64, 192, kernel_size=5, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv2 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a, l2=l2), Conv2dQ(192, 384, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv3 nn.ReLU(inplace=True), ActQ(nbits=nbits_a, l2=l2), Conv2dQ(384, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv4 nn.ReLU(inplace=True), ActQ(nbits=nbits_a, l2=l2), Conv2dQ(256, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv5 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), ActQ(nbits=nbits_a, l2=l2), ) self.classifier = nn.Sequential( # nn.Dropout(), LinearQ(256 * 6 * 6, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc6 nn.ReLU(inplace=True), ActQ(nbits=nbits_a, l2=l2), # nn.Dropout(), LinearQ(4096, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc7 nn.ReLU(inplace=True), ActQ(nbits=nbits_a, l2=l2), # key layer LinearQ(4096, num_classes, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc8 )
def __init__(self, num_classes=1000, nbits_w=4, nbits_a=4, q_mode=Qmodes.kernel_wise, l2=True): super(AlexNetQPACT, self).__init__() self.features = nn.Sequential( nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), # conv1 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), PACT(nbits=nbits_a), Conv2dQ(64, 192, kernel_size=5, padding=2, nbits=nbits_w, mode=q_mode, l2=l2), # conv2 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), PACT(nbits=nbits_a), Conv2dQ(192, 384, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv3 nn.ReLU(inplace=True), PACT(nbits=nbits_a), Conv2dQ(384, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv4 nn.ReLU(inplace=True), PACT(nbits=nbits_a), Conv2dQ(256, 256, kernel_size=3, padding=1, nbits=nbits_w, mode=q_mode, l2=l2), # conv5 nn.ReLU(inplace=True), nn.MaxPool2d(kernel_size=3, stride=2), PACT(nbits=nbits_a), ) self.classifier = nn.Sequential( nn.Dropout(), # As the experiment result shows, there is no difference between layer wise with kernel wise. LinearQ(256 * 6 * 6, 4096, nbits=nbits_w, mode=Qmodes.layer_wise, l2=l2), # fc6 nn.ReLU(inplace=True), PACT(nbits=nbits_a), nn.Dropout(), LinearQ(4096, 4096, nbits=nbits_w, mode=q_mode.layer_wise, l2=l2), # fc7 nn.ReLU(inplace=True), nn.Linear(4096, num_classes), # fc8 )