def forward(self, x): x = quantize_activations_gemm_A(x, self.act_scale[0]) residual = x out1 = self.conv1(x) conv1_weight, conv1_scale = quantize_weight_gemm_S(self.conv1.weight) conv1_bias = quantize_bias_gemm( self.conv1.bias) / (conv1_scale * self.bias_scale[0]) out = F.conv2d(x, conv1_weight, conv1_bias) * conv1_scale out = self.relu1(out) out = out * self.act_scale[0] out = quantize_activations_gemm_A(out, self.act_scale[1]) out2 = self.conv2(out) conv2_weight, conv2_scale = quantize_weight_gemm_S(self.conv2.weight) conv2_bias = quantize_bias_gemm( self.conv2.bias) / (conv2_scale * self.bias_scale[1]) out = F.conv2d( out, conv2_weight, conv2_bias, stride=self.stride, padding=1) * conv2_scale out = self.relu2(out) out = out * self.act_scale[1] out = quantize_activations_gemm_A(out, self.act_scale[2]) out3 = self.conv3(out) conv3_weight, conv3_scale = quantize_weight_gemm_S(self.conv3.weight) conv3_bias = quantize_bias_gemm( self.conv3.bias) / (conv2_scale * self.bias_scale[2]) out = F.conv2d(out, conv3_weight, conv3_bias, padding=1) * conv3_scale out = out * self.act_scale[2] out4 = self.shortcut(residual) if self.downsample: short_weight, short_scale = quantize_weight_gemm_S( self.shortcut[0].weight) short_bias = quantize_bias_gemm( self.shortcut[0].bias) / (short_scale * self.act_scale[0]) residual = F.conv2d( residual, short_weight, short_bias, stride=self.stride) * short_scale residual = residual * self.act_scale[0] out += residual out = self.relu(out) return out
def forward(self, input): qweight, scale = quantize_weight_gemm_S(self.weight) if self.bias is not None: qbias = quantize_bias_gemm(self.bias / scale) else: qbias = None qinput = quantize_activations_gemm(input) out = F.linear(qinput, qweight, qbias) * scale # out = quantize_activations_gemm(out) return out
def forward(self, input): qweight, scale = quantize_weight_gemm_S(self.weight) if self.bias is not None: qbias = quantize_bias_gemm(self.bias / scale) else: qbias = None qinput = quantize_activations_gemm(input) out = F.conv2d(qinput, qweight, qbias, self.stride, self.padding, self.dilation, self.groups) * scale return out
def forward(self, x): x1 = quantize_activations_gemm_A(x, self.act_scale) out1 = self.conv(x1) conv_weight, conv_scale = quantize_weight_gemm_S(self.conv.weight) conv_bias = quantize_bias_gemm(self.conv.bias/(conv_scale*self.bias_scale)) h = x.size()[-1] bias1 = conv_bias.repeat(h, h, 1) bias = bias1.transpose(0, 2) out = (F.conv2d(x1, conv_weight, stride=1, padding=1) + bias)*conv_scale out = self.relu(out) # out = quantize_activations_gemm_B(out) out = out*self.act_scale # out = quantize_activations_gemm(out) return out
def forward(self, x): x = quantize_activations_gemm_A(x, self.scale) x1 = self.conv1(x) conv1_weight, conv1_scale = quantize_weight_gemm_S(self.conv1.weight) conv1_bias = quantize_bias_gemm(self.conv1.bias) / conv1_scale x = F.conv2d(x, conv1_weight, conv1_bias, stride=1, padding=1) * conv1_scale x = self.relu1(x) x = x * self.scale x = self.layer1(x) x = self.layer2(x) x = self.layer3(x) x = self.layer4(x) x = F.avg_pool2d(x, 4) x = x.view(x.size(0), -1) x = self.linear(x) return x