def __init__(self, quant, num_classes=10, depth=110): super(PreResNet, self).__init__() assert (depth - 2) % 6 == 0, 'depth should be 6n+2' n = (depth - 2) // 6 block = Bottleneck if depth >= 44 else BasicBlock self.inplanes = 16 self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) self.layer1 = self._make_layer(block, 16, n, quant) self.layer2 = self._make_layer(block, 32, n, quant, stride=2) self.layer3 = self._make_layer(block, 64, n, quant, stride=2) self.bn = nn.BatchNorm2d(64 * block.expansion) self.relu = nn.ReLU(inplace=True) self.avgpool = nn.AvgPool2d(8) self.fc = nn.Linear(64 * block.expansion, num_classes) self.quant = quant() IBM_half = FloatingPoint(exp=6, man=9) self.quant_half = Quantizer(IBM_half, IBM_half, "nearest", "nearest") for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, quant, num_classes, gamma=0.5, alpha=0.5, block_size=16): super(MobileNet, self).__init__() self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(32) self.layers = self._make_layers(in_planes=32, stride=1, quant=quant, gamma=gamma, alpha=alpha, block_size=block_size) self.linear = nn.Linear(1024, num_classes) self.quant = quant() IBM_half = FloatingPoint(exp=6, man=9) self.quant_half = Quantizer(IBM_half, IBM_half, "nearest", "nearest")
def conv_bn(c_in, c_out, quant): IBM_half = FloatingPoint(exp=6, man=9) quant_half = Quantizer(IBM_half, IBM_half, "nearest", "nearest") return { 'half_quant': quant_half(), 'conv': nn.Conv2d(c_in, c_out, kernel_size=3, stride=1, padding=1, bias=False), 'quant': quant(), 'bn': BatchNorm(c_out), 'relu': nn.ReLU(True), }
def __init__(self, quant=None, num_classes=10, depth=16, batch_norm=False): super(VGG, self).__init__() self.features = make_layers(cfg[depth], quant, batch_norm) IBM_half = FloatingPoint(exp=6, man=9) quant_half = lambda: Quantizer(IBM_half, IBM_half, "nearest", "nearest" ) self.classifier = nn.Sequential(quant_half(), nn.Dropout(), nn.Linear(512, 512), nn.ReLU(True), quant(), nn.Dropout(), nn.Linear(512, 512), nn.ReLU(True), quant(), nn.Linear(512, num_classes), quant_half()) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) m.bias.data.zero_()
def __init__(self, quant=None, num_classes=10, depth=16, batch_norm=False, group_norm=False, gamma=0.5, alpha=0.5, block_size=16, cg_groups=1, cg_alpha=2.0, cg_threshold_init=-3.0): super(VGG, self).__init__() self.features = make_layers(cfg[depth], quant, batch_norm, group_norm, gamma, alpha, block_size, cg_threshold_init, cg_alpha, cg_groups) IBM_half = FloatingPoint(exp=6, man=9) quant_half = lambda: Quantizer(IBM_half, IBM_half, "nearest", "nearest" ) self.classifier = nn.Sequential( quant_half(), nn.Dropout(), Linear_TD(512, 512, gamma=gamma, alpha=alpha, block_size=block_size), nn.ReLU(True), quant(), nn.Dropout(), Linear_TD(512, 512, gamma=gamma, alpha=alpha, block_size=block_size), nn.ReLU(True), quant(), nn.Linear(512, num_classes), quant_half()) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) m.bias.data.zero_()
def conv_bn_TD(c_in, c_out, quant, gamma=0.0, alpha=0.0, block_size=16): IBM_half = FloatingPoint(exp=6, man=9) quant_half = Quantizer(IBM_half, IBM_half, "nearest", "nearest") return { 'conv': Conv2d_TD(c_in, c_out, kernel_size=3, stride=1, padding=1, bias=False, gamma=gamma, alpha=alpha, block_size=block_size), 'quant1': quant(), 'bn': BatchNorm(c_out), 'relu': nn.ReLU(True), 'quant2': quant() }
num_man = getattr(args, "{}_man".format(num)) num_exp = getattr(args, "{}_exp".format(num)) number = FloatingPoint(exp=num_exp, man=num_man) print("{}: {} rounding, {}".format(num, num_rounding, number)) quantizers[num] = quantizer(forward_number=number, forward_rounding=num_rounding) # Build model print("Model: {}".format(args.model)) model_cfg = getattr(models, args.model) if "LP" in args.model: activate_number = FloatingPoint(exp=args.activate_exp, man=args.activate_man) error_number = FloatingPoint(exp=args.error_exp, man=args.error_man) print("activation: {}, {}".format(args.activate_rounding, activate_number)) print("error: {}, {}".format(args.error_rounding, error_number)) make_quant = lambda: Quantizer(activate_number, error_number, args. activate_rounding, args.error_rounding) model_cfg.kwargs.update({"quant": make_quant}) model = model_cfg.base(*model_cfg.args, num_classes=num_classes, **model_cfg.kwargs) model.cuda() criterion = F.cross_entropy optimizer = SGD( model.parameters(), lr=args.lr_init, momentum=args.momentum, weight_decay=args.wd, ) optimizer = OptimLP(
batch_size=64, num_workers=1, pin_memory=True) } # We then define the quantization setting we are going to use. We define a low and high precision format for different parts of the computation. # In[4]: # define two floating point formats lowp = FixedPoint(wl=8, fl=7) highp = FloatingPoint(exp=8, man=7) # this is bfloat16 # define quantization functions weight_quant = Quantizer(forward_number=lowp, backward_number=None, forward_rounding="nearest", backward_rounding="nearest") grad_quant = Quantizer(forward_number=lowp, backward_number=None, forward_rounding="nearest", backward_rounding="stochastic") momentum_quant = Quantizer(forward_number=highp, backward_number=None, forward_rounding="nearest", backward_rounding="stochastic") acc_quant = Quantizer(forward_number=highp, backward_number=None, forward_rounding="nearest", backward_rounding="nearest") # define a lambda function so that the Quantizer module can be duplicated easily
def __init__(self, quant, depth, num_classes, gamma=0.5, alpha=0.5, block_size=16, non_uni_sparse=True, threshold=0.0): """ Constructor Args: depth: number of layers. num_classes: number of classes base_width: base width """ super(CifarResNet, self).__init__() block = ResNetBasicblock #Model type specifies number of layers for CIFAR-10 and CIFAR-100 model assert (depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110' layer_blocks = (depth - 2) // 6 print('CifarResNet : Depth : {} , Layers for each block : {}'.format( depth, layer_blocks)) self.non_uni_sparse = non_uni_sparse self.threshold = threshold self.num_classes = num_classes self.conv_1_3x3 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False) self.bn_1 = nn.BatchNorm2d(16) self.inplanes = 16 self.stage_1 = self._make_layer(block, 16, layer_blocks, quant, 1, gamma=gamma, alpha=alpha, block_size=block_size) self.stage_2 = self._make_layer(block, 32, layer_blocks, quant, 2, gamma=gamma, alpha=alpha, block_size=block_size) self.stage_3 = self._make_layer(block, 64, layer_blocks, quant, 2, gamma=gamma, alpha=alpha, block_size=block_size) self.avgpool = nn.AvgPool2d(8) self.classifier = nn.Linear(64 * block.expansion, num_classes) self.quant = quant() IBM_half = FloatingPoint(exp=6, man=9) self.quant_half = Quantizer(IBM_half, IBM_half, "nearest", "nearest") for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) #m.bias.data.zero_() elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_() elif isinstance(m, nn.Linear): init.kaiming_normal_(m.weight) m.bias.data.zero_()