def h_swish(x): """Hard Swish: MobileNetV3 https://arxiv.org/pdf/1905.02244.pdf """ return x * F.relu6(x + 3) / 6
def forward(self, x): return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
def hard_sigmoid(x, inplace: bool = False): if inplace: return x.add_(3.).clamp_(0., 6.).div_(6.) else: return F.relu6(x + 3.) / 6.
def hard_sigmoid(x, inplace=False): return F.relu6(x + 3, inplace) / 6
def hard_swish(input, inplace=False): return input * F.relu6(input + 3.).div(6.)
def forward(self, x): out = F.relu6(x + 3., self.inplace) / 6. return out * x
def forward(self, input): # 训练态 if self.training: self.step += 1 if self.bn: # 先做普通卷积得到A,以取得BN参数 output = F.conv2d(input=input, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) # 更新BN统计参数(batch和running) dims = [dim for dim in range(4) if dim != 1] self.batch_mean = torch.mean(output, dim=dims) self.batch_var = torch.var(output, dim=dims) with torch.no_grad(): if self.first_bn == 0 and torch.equal( self.running_mean, torch.zeros_like( self.running_mean)) and torch.equal( self.running_var, torch.zeros_like(self.running_var)): self.first_bn.add_(1) self.running_mean.add_(self.batch_mean) self.running_var.add_(self.batch_var) else: self.running_mean.mul_(1 - self.momentum).add_( self.momentum * self.batch_mean) self.running_var.mul_(1 - self.momentum).add_( self.momentum * self.batch_var) # BN融合 if self.step < self.freeze_step: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.batch_mean) * (self.gamma / torch.sqrt(self.batch_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.batch_mean * (self.gamma / torch.sqrt(self.batch_var + self.eps))) # b融batch weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.batch_var + self.eps)) # w融running else: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps))) # b融batch weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 测试态 else: # print(self.running_mean, self.running_var) # BN融合 if self.bn: if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * (self.gamma / torch.sqrt(self.running_var + self.eps)) ) # b融running weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 量化A和bn融合后的W q_weight = self.weight_quantizer(weight) q_bias = self.bias_quantizer(bias) if self.quantizer_output == True: # 输出量化参数txt文档 # 创建的quantizer_output输出文件夹 if not os.path.isdir('./quantier_output'): os.makedirs('./quantier_output') if not os.path.isdir('./quantier_output/q_weight_out'): os.makedirs('./quantier_output/q_weight_out') if not os.path.isdir('./quantier_output/w_scale_out'): os.makedirs('./quantier_output/w_scale_out') if not os.path.isdir('./quantier_output/q_weight_max'): os.makedirs('./quantier_output/q_weight_max') if not os.path.isdir('./quantier_output/max_weight_count'): os.makedirs('./quantier_output/max_weight_count') #######################输出当前层的权重量化因子 weight_scale = self.weight_quantizer.get_scale() np.savetxt( ('./quantier_output/w_scale_out/scale %f.txt' % time.time()), weight_scale, delimiter='\n') #######################输出当前层的量化权重 q_weight_txt = self.weight_quantizer.get_quantize_value(weight) q_weight_txt = np.array(q_weight_txt.cpu()).reshape(1, -1) q_weight_max = [np.max(q_weight_txt)] # q_weight_max = np.argmax(q_weight_txt) max_weight_count = [np.sum(abs(q_weight_txt) >= 255)] # 统计该层溢出的数目 np.savetxt( ('./quantier_output/max_weight_count/max_weight_count %f.txt' % time.time()), max_weight_count) np.savetxt(('./quantier_output/q_weight_max/max_weight %f.txt' % time.time()), q_weight_max) np.savetxt( ('./quantier_output/q_weight_out/weight %f.txt' % time.time()), q_weight_txt, delimiter='\n') # io.savemat('save.mat',{'q_weight_txt':q_weight_txt}) #######################创建输出偏置txt的文件夹 if not os.path.isdir('./quantier_output/q_bias_out'): os.makedirs('./quantier_output/q_bias_out') if not os.path.isdir('./quantier_output/b_scale_out'): os.makedirs('./quantier_output/b_scale_out') #######################输出当前层偏置的量化因子 bias_scale = self.bias_quantizer.get_scale() np.savetxt( ('./quantier_output/b_scale_out/scale %f.txt' % time.time()), bias_scale, delimiter='\n') #######################输出当前层的量化偏置 q_bias_txt = self.bias_quantizer.get_quantize_value(bias) q_bias_txt = np.array(q_bias_txt.cpu()).reshape(1, -1) np.savetxt( ('./quantier_output/q_bias_out/bias %f.txt' % time.time()), q_bias_txt, delimiter='\n') # 量化卷积 if self.training: # 训练态 output = F.conv2d( input=input, weight=q_weight, # bias=self.bias, # 注意,这里不加bias(self.bias为None) bias=q_bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) else: # 测试态 output = F.conv2d( input=input, weight=q_weight, bias=q_bias, # 注意,这里加bias,做完整的conv+bn stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) if self.activate == 'leaky': output = F.leaky_relu(output, 0.125, inplace=True) elif self.activate == 'relu6': output = F.relu6(output, inplace=True) elif self.activate == 'h_swish': output = output * (F.relu6(output + 3.0, inplace=True) / 6.0) elif self.activate == 'relu': output = F.relu(output, inplace=True) elif self.activate == 'mish': output = output * F.softplus(output).tanh() elif self.activate == 'linear': return output # pass else: print(self.activate + " is not supported !") if self.quantizer_output == True: if not os.path.isdir('./quantier_output/q_activation_out'): os.makedirs('./quantier_output/q_activation_out') if not os.path.isdir('./quantier_output/a_scale_out'): os.makedirs('./quantier_output/a_scale_out') if not os.path.isdir('./quantier_output/q_activation_max'): os.makedirs('./quantier_output/q_activation_max') if not os.path.isdir('./quantier_output/max_activation_count'): os.makedirs('./quantier_output/max_activation_count') ##################输出当前激活的量化因子 activation_scale = self.activation_quantizer.get_scale() np.savetxt( ('./quantier_output/a_scale_out/scale %f.txt' % time.time()), activation_scale, delimiter='\n') ##################输出当前层的量化激活 q_activation_txt = self.activation_quantizer.get_quantize_value( output) q_activation_txt = np.array(q_activation_txt.cpu()).reshape(1, -1) q_activation_max = [np.max(q_activation_txt)] # 统计该层的最大值(即查看是否有溢出) max_activation_count = [np.sum(abs(q_activation_txt) >= 255) ] # 统计该层溢出的数目 # q_weight_max = np.argmax(q_weight_txt) np.savetxt(( './quantier_output/max_activation_count/max_activation_count %f.txt' % time.time()), max_activation_count) np.savetxt( ('./quantier_output/q_activation_max/max_activation %f.txt' % time.time()), q_activation_max) np.savetxt( ('./quantier_output/q_activation_out/activation %f.txt' % time.time()), q_activation_txt, delimiter='\n') output = self.activation_quantizer(output) return output
def forward(self, x): pi1 = F.relu6(self.pi1(x)) logits = self.pi2(pi1) v1 = F.relu6(self.v1(x)) values = self.v2(v1) return logits, values
def relu6_forward_unquant(self, input): return F.relu6(input)
def forward(self, input): tic = time.time() output = F.relu6(input, self.inplace) toc = time.time() self.run_time = toc - tic return output
def relu6_forward(self, input): return qunsigned(F.relu6(input), self.acti_log2_t, self.acti_bit_width)
def forward(self, x): if self.inplace: return x.add_(3.).clamp_(0., 6.).div_(6.) else: return F.relu6(x + 3.) / 6.
def hard_swish(x, inplace=False): if inplace: return x.mul_(F.relu6(x + 3.) / 6.) else: return x * F.relu6(x + 3.) / 6.
def forward(self, x): out = F.relu6(self.bn1(self.conv1(x))) out = F.relu6(self.bn2(self.conv2(out))) out = self.bn3(self.conv3(out)) out = out + self.shortcut(x) if self.stride == 1 else out return out
def forward(self, x): return F.relu6(self.bn(self.conv(x)))
def forward(self, input): # 训练态 if self.training: if self.bn: # 先做普通卷积得到A,以取得BN参数 output = F.conv2d(input=input, weight=self.weight, bias=self.bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) # 更新BN统计参数(batch和running) dims = [dim for dim in range(4) if dim != 1] self.batch_mean = torch.mean(output, dim=dims) self.batch_var = torch.var(output, dim=dims) with torch.no_grad(): if self.first_bn == 0 and torch.equal( self.running_mean, torch.zeros_like( self.running_mean)) and torch.equal( self.running_var, torch.zeros_like(self.running_var)): self.first_bn.add_(1) self.running_mean.add_(self.batch_mean) self.running_var.add_(self.batch_var) else: self.running_mean.mul_(1 - self.momentum).add_( self.batch_mean * self.momentum) self.running_var.mul_(1 - self.momentum).add_( self.batch_var * self.momentum) # BN融合 if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.batch_mean) * (self.gamma / torch.sqrt(self.batch_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.batch_mean * (self.gamma / torch.sqrt(self.batch_var + self.eps)) ) # b融batch weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.batch_var + self.eps)) # w融running # if self.bias is not None: # bias = reshape_to_bias( # self.beta + (self.bias - self.running_mean) * ( # self.gamma / torch.sqrt(self.running_var + self.eps))) # else: # bias = reshape_to_bias( # self.beta - self.running_mean * ( # self.gamma / torch.sqrt(self.running_var + self.eps))) # b融batch # weight = self.weight * reshape_to_weight( # self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 测试态 else: # print(self.running_mean, self.running_var) if self.bn: # BN融合 if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * self.gamma / torch.sqrt(self.running_var + self.eps)) # b融running weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 量化A和bn融合后的W q_weight = self.weight_quantizer(weight) q_bias = self.bias_quantizer(bias) # 量化卷积 if self.training: # 训练态 output = F.conv2d(input=input, weight=q_weight, bias=q_bias, stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) # (这里将训练态下,卷积中w融合running参数的效果转为融合batch参数的效果)running ——> batch # if self.bn: # output *= reshape_to_activation( # torch.sqrt(self.running_var + self.eps) / torch.sqrt(self.batch_var + self.eps)) # output += reshape_to_activation( # self.gamma * (self.running_mean / (self.running_var + self.eps) - self.batch_mean / ( # self.batch_var + self.eps))) # output += reshape_to_activation(bias) else: # 测试态 output = F.conv2d( input=input, weight=q_weight, bias=q_bias, # 注意,这里加bias,做完整的conv+bn stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) if self.activate == 'leaky': output = F.leaky_relu(output, 0.125, inplace=True) elif self.activate == 'relu6': output = F.relu6(output, inplace=True) elif self.activate == 'h_swish': output = output * (F.relu6(output + 3.0, inplace=True) / 6.0) elif self.activate == 'relu': output = F.relu(output, inplace=True) elif self.activate == 'mish': output = output * F.softplus(output).tanh() elif self.activate == 'linear': # return output pass else: print(self.activate + "%s is not supported !") output = self.activation_quantizer(output) return output
def forward(self, x): out = x * (F.relu6(x + 3) / 6) return out
def hard_sigmoid(x, inplace=True): relu = F.relu6(x + 3.0) return (1.0 / 6) * relu
def forward(self, x): out = F.relu6(x + 3) / 6 return out
def hard_sigmoid(input, inplace=False): return F.relu6(input + 3).div(6.)
def prune_model_keep_size(model, prune_idx, CBL_idx, CBLidx2mask): pruned_model = deepcopy(model) activations = [] for i, model_def in enumerate(model.module_defs): if model_def['type'] == 'convolutional' or model_def[ 'type'] == 'depthwise' or model_def['type'] == 'se': activation = torch.zeros(int(model_def['filters'])).cuda() if i in prune_idx: mask = torch.from_numpy(CBLidx2mask[i]).cuda() bn_module = pruned_model.module_list[i][1] bn_module.weight.data.mul_(mask) if hasattr(pruned_model.module_list[i], 'activation'): ac_module = pruned_model.module_list[i][2] if ac_module.__class__.__name__ == "LeakyReLU": activation = F.leaky_relu( (1 - mask) * bn_module.bias.data, 0.1) elif ac_module.__class__.__name__ == "ReLU6": activation = F.relu6((1 - mask) * bn_module.bias.data, inplace=True) elif ac_module.__class__.__name__ == "HardSwish": x = (1 - mask) * bn_module.bias.data activation = x * (F.relu6(x + 3.0, inplace=True) / 6.0) elif ac_module.__class__.__name__ == "ReLU": activation = F.relu((1 - mask) * bn_module.bias.data, 0.1) elif ac_module.__class__.__name__ == "Mish": x = (1 - mask) * bn_module.bias.data activation = x * F.softplus(x).tanh() else: activation = (1 - mask) * bn_module.bias.data else: activation = (1 - mask) * bn_module.bias.data update_activation(i, pruned_model, activation, CBL_idx) bn_module.bias.data.mul_(mask) activations.append(activation) elif model_def['type'] == 'shortcut': actv1 = activations[i - 1] from_layer = int(model_def['from'][0]) actv2 = activations[i + from_layer] activation = actv1 + actv2 update_activation(i, pruned_model, activation, CBL_idx) activations.append(activation) elif model_def['type'] == 'route': # spp不参与剪枝,其中的route不用更新,仅占位 from_layers = [int(s) for s in model_def['layers']] activation = None if len(from_layers) == 1: activation = activations[ i + from_layers[0] if from_layers[0] < 0 else from_layers[0]] if 'groups' in model_def: activation = activation[(activation.shape[0] // 2):] update_activation(i, pruned_model, activation, CBL_idx) elif len(from_layers) == 2: actv1 = activations[i + from_layers[0]] actv2 = activations[ i + from_layers[1] if from_layers[1] < 0 else from_layers[1]] activation = torch.cat((actv1, actv2)) update_activation(i, pruned_model, activation, CBL_idx) activations.append(activation) elif model_def['type'] == 'upsample': # activation = torch.zeros(int(model.module_defs[i - 1]['filters'])).cuda() activations.append(activations[i - 1]) elif model_def['type'] == 'yolo': activations.append(None) elif model_def['type'] == 'maxpool': # 区分spp和tiny if model.module_defs[i + 1]['type'] == 'route': activations.append(None) else: activation = activations[i - 1] update_activation(i, pruned_model, activation, CBL_idx) activations.append(activation) return pruned_model
def hard_swish(self, x, inplace): inner = F.relu6(x + 3.).div_(6.) return x.mul_(inner) if inplace else x.mul(inner)
def hard_swish(x): return x * F.relu6(x + 3.) / 6.
def backward_hook(module, grad_in, grad_out): # Cut off negative gradients if isinstance(module, nn.ReLU): return (F.relu6(grad_in[0]), )
def hard_sigmoid(x): return F.relu6(x + 3.) / 6.
def forward(self, x): out = F.relu6(x + 3, inplace=True) / 6 return out
def forward(self, x): return F.relu6(x + 3, inplace=self.inplace) / 6
def forward(self, x): return F.relu6(x + 3.0, inplace=True) / 6.0
def forward(self, x): pl_1 = F.relu6(self.policy_layer_1(x)) policy = F.softmax(self.policy_layer_2(pl_1), dim=1) vl_1 = F.relu6(self.value_layer_1(x)) value = self.value_layer_2(vl_1) return policy, value
def forward(self, input): if self.bn: # BN融合 if self.bias is not None: bias = reshape_to_bias( self.beta + (self.bias - self.running_mean) * (self.gamma / torch.sqrt(self.running_var + self.eps))) else: bias = reshape_to_bias( self.beta - self.running_mean * self.gamma / torch.sqrt(self.running_var + self.eps)) # b融running weight = self.weight * reshape_to_weight( self.gamma / torch.sqrt(self.running_var + self.eps)) # w融running else: bias = self.bias weight = self.weight # 量化A和bn融合后的W q_weight = self.weight_quantizer(weight) q_bias = self.bias_quantizer(bias) if self.quantizer_output == True: # 输出量化参数txt文档 # 创建的quantizer_output输出文件夹 if not os.path.isdir('./quantizer_output'): os.makedirs('./quantizer_output') if not os.path.isdir('./quantizer_output/q_weight_out'): os.makedirs('./quantizer_output/q_weight_out') if not os.path.isdir('./quantizer_output/w_scale_out'): os.makedirs('./quantizer_output/w_scale_out') if not os.path.isdir('./quantizer_output/q_weight_max'): os.makedirs('./quantizer_output/q_weight_max') if not os.path.isdir('./quantizer_output/max_weight_count'): os.makedirs('./quantizer_output/max_weight_count') if not os.path.isdir('./quantizer_output/q_weight_reorder'): os.makedirs('./quantizer_output/q_weight_reorder') if not os.path.isdir('./quantizer_output/q_bias_reorder'): os.makedirs('./quantizer_output/q_bias_reorder') if self.layer_idx == -1: #######################输出当前层的权重量化因子 weight_scale = -self.weight_quantizer.get_scale() np.savetxt( ('./quantizer_output/w_scale_out/w_scale_%s' % self.name), weight_scale, delimiter='\n') #######################输出当前层的量化权重 q_weight_txt = self.weight_quantizer.get_quantize_value(weight) #############权重重排序 w_para = q_weight_txt # 重排序参数 if self.reorder == True: # print("use weights reorder!") shape_output = w_para.shape[0] shape_input = w_para.shape[1] num_TN = int(shape_input / self.TN) remainder_TN = shape_input % self.TN num_TM = int(shape_output / self.TM) remainder_TM = shape_output % self.TM first = True reorder_w_para = None if self.activate == 'linear': print('layer-linear reorder!') for k in range(num_TN): temp = w_para[0:remainder_TM, k * self.TN:(k + 1) * self.TN, :, :] temp = temp.view(temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) temp = temp.permute(2, 0, 1).contiguous().view(-1) if first: reorder_w_para = temp.clone().cpu().data.numpy( ) first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) else: for j in range(num_TM): if shape_input == 3 or shape_input == 1: # 第一层 print('The first layer~~~~~~~~~~~~') temp = w_para[j * self.TM:(j + 1) * self.TM, num_TN * self.TN:num_TN * self.TN + remainder_TN, :, :] temp = temp.view(temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) fill = torch.zeros(self.TM, self.TN, temp.shape[2]).to( temp.device) fill[:, 0:remainder_TN, :] = temp temp = fill.permute(2, 0, 1).contiguous().view(-1) if first: # 创建数组存储 reorder_w_para = temp.clone().cpu( ).data.numpy() first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) else: for k in range(num_TN): temp = w_para[j * self.TM:(j + 1) * self.TM, k * self.TN:(k + 1) * self.TN, :, :] # #合并成论文图10(a)的TM*TN*(K2)的张量格式 temp = temp.view( temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) # 转换为图10(b)的重排序格式 temp = temp.permute( 2, 0, 1).contiguous().view(-1) if first: reorder_w_para = temp.clone().cpu( ).data.numpy() first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) w_para_flatten = reorder_w_para # print(reorder_w_para.size) #####验证重排序结果的正确性 '''if w_para_flatten.size == w_para.shape[0] * w_para.shape[1] * w_para.shape[2] * w_para.shape[3]: print("weights convert correctly!") else: print("weights convert mismatchingly!")''' q_weight_reorder = w_para_flatten q_weight_reorder = np.array(q_weight_reorder).reshape( 1, -1) np.savetxt( ('./quantizer_output/q_weight_reorder/w_reorder_%s.txt' % self.name), q_weight_reorder, delimiter='\n') ################权重重排序结束 q_weight_txt = np.array(q_weight_txt.cpu()).reshape(1, -1) q_weight_max = [np.max(q_weight_txt)] # q_weight_max = np.argmax(q_weight_txt) max_weight_count = [ np.sum(abs(q_weight_txt) >= (1 << (self.w_bits - 1)) - 1) ] # 统计该层溢出的数目 np.savetxt( ('./quantizer_output/max_weight_count/max_w_count_%s.txt' % self.name), max_weight_count) np.savetxt(('./quantizer_output/q_weight_max/max_w_%s.txt' % self.name), q_weight_max) np.savetxt(('./quantizer_output/q_weight_out/q_weight_%s.txt' % self.name), q_weight_txt, delimiter='\n') # io.savemat('save.mat',{'q_weight_txt':q_weight_txt}) #######################创建输出偏置txt的文件夹 if not os.path.isdir('./quantizer_output/q_bias_out'): os.makedirs('./quantizer_output/q_bias_out') if not os.path.isdir('./quantizer_output/b_scale_out'): os.makedirs('./quantizer_output/b_scale_out') #######################输出当前层偏置的量化因子 bias_scale = -self.bias_quantizer.get_scale() np.savetxt(('./quantizer_output/b_scale_out/b_scale_%s.txt' % self.name), bias_scale, delimiter='\n') #######################输出当前层的量化偏置 q_bias_txt = self.bias_quantizer.get_quantize_value(bias) q_bias_txt = np.array(q_bias_txt.cpu()).reshape(1, -1) np.savetxt(('./quantizer_output/q_bias_out/q_bias_%s.txt' % self.name), q_bias_txt, delimiter='\n') #############偏置重排序 if self.reorder == True: b_para = np.zeros(2048, dtype=int) b_para[0:q_bias_txt.size] = q_bias_txt # print(b_para.shape) # b_para = np.array(b_para.cpu()).reshape(1, -1) np.savetxt( ('./quantizer_output/q_bias_reorder/q_b_reorder_%s.txt' % self.name), b_para, delimiter='\n') ######权重和偏置的重排序数据的二进制文件保存 bias_weight_reorder = np.append(b_para, q_weight_reorder) wb_flat = bias_weight_reorder.astype(np.int8) writer = open( './quantizer_output/q_weight_reorder/%s_bias_weight_q_bin' % self.name, "wb") writer.write(wb_flat) writer.close() ################偏置重排序结束 elif int(self.name[1:4]) == self.layer_idx: #######################输出当前层的权重量化因子 weight_scale = -self.weight_quantizer.get_scale() np.savetxt( ('./quantizer_output/w_scale_out/w_scale_%s' % self.name), weight_scale, delimiter='\n') #######################输出当前层的量化权重 q_weight_txt = self.weight_quantizer.get_quantize_value(weight) #############权重重排序 w_para = q_weight_txt # 重排序参数 if self.reorder == True: # print("use weights reorder!") shape_output = w_para.shape[0] shape_input = w_para.shape[1] num_TN = int(shape_input / self.TN) remainder_TN = shape_input % self.TN num_TM = int(shape_output / self.TM) remainder_TM = shape_output % self.TM first = True reorder_w_para = None if self.activate == 'linear': print('layer-linear reorder!') for k in range(num_TN): temp = w_para[0:remainder_TM, k * self.TN:(k + 1) * self.TN, :, :] temp = temp.view(temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) temp = temp.permute(2, 0, 1).contiguous().view(-1) if first: reorder_w_para = temp.clone().cpu().data.numpy( ) first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) else: for j in range(num_TM): if shape_input == 3 or shape_input == 1: # 第一层 print('The first layer~~~~~~~~~~~~') temp = w_para[j * self.TM:(j + 1) * self.TM, num_TN * self.TN:num_TN * self.TN + remainder_TN, :, :] temp = temp.view(temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) fill = torch.zeros(self.TM, self.TN, temp.shape[2]).to( temp.device) fill[:, 0:remainder_TN, :] = temp temp = fill.permute(2, 0, 1).contiguous().view(-1) if first: # 创建数组存储 reorder_w_para = temp.clone().cpu( ).data.numpy() first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) else: for k in range(num_TN): temp = w_para[j * self.TM:(j + 1) * self.TM, k * self.TN:(k + 1) * self.TN, :, :] # #合并成论文图10(a)的TM*TN*(K2)的张量格式 temp = temp.view( temp.shape[0], temp.shape[1], temp.shape[2] * temp.shape[3]) # 转换为图10(b)的重排序格式 temp = temp.permute( 2, 0, 1).contiguous().view(-1) if first: reorder_w_para = temp.clone().cpu( ).data.numpy() first = False else: reorder_w_para = np.append( reorder_w_para, temp.cpu().data.numpy()) w_para_flatten = reorder_w_para # print(reorder_w_para.size) #####验证重排序结果的正确性 '''if w_para_flatten.size == w_para.shape[0] * w_para.shape[1] * w_para.shape[2] * w_para.shape[3]: print("weights convert correctly!") else: print("weights convert mismatchingly!")''' q_weight_reorder = w_para_flatten q_weight_reorder = np.array(q_weight_reorder).reshape( 1, -1) np.savetxt( ('./quantizer_output/q_weight_reorder/w_reorder_%s.txt' % self.name), q_weight_reorder, delimiter='\n') ################权重重排序结束 q_weight_txt = np.array(q_weight_txt.cpu()).reshape(1, -1) q_weight_max = [np.max(q_weight_txt)] # q_weight_max = np.argmax(q_weight_txt) max_weight_count = [ np.sum(abs(q_weight_txt) >= (1 << (self.w_bits - 1)) - 1) ] # 统计该层溢出的数目 np.savetxt( ('./quantizer_output/max_weight_count/max_w_count_%s.txt' % self.name), max_weight_count) np.savetxt(('./quantizer_output/q_weight_max/max_w_%s.txt' % self.name), q_weight_max) np.savetxt(('./quantizer_output/q_weight_out/q_weight_%s.txt' % self.name), q_weight_txt, delimiter='\n') # io.savemat('save.mat',{'q_weight_txt':q_weight_txt}) #######################创建输出偏置txt的文件夹 if not os.path.isdir('./quantizer_output/q_bias_out'): os.makedirs('./quantizer_output/q_bias_out') if not os.path.isdir('./quantizer_output/b_scale_out'): os.makedirs('./quantizer_output/b_scale_out') #######################输出当前层偏置的量化因子 bias_scale = -self.bias_quantizer.get_scale() np.savetxt(('./quantizer_output/b_scale_out/b_scale_%s.txt' % self.name), bias_scale, delimiter='\n') #######################输出当前层的量化偏置 q_bias_txt = self.bias_quantizer.get_quantize_value(bias) q_bias_txt = np.array(q_bias_txt.cpu()).reshape(1, -1) np.savetxt(('./quantizer_output/q_bias_out/q_bias_%s.txt' % self.name), q_bias_txt, delimiter='\n') #############偏置重排序 if self.reorder == True: b_para = np.zeros(2048, dtype=int) b_para[0:q_bias_txt.size] = q_bias_txt # print(b_para.shape) # b_para = np.array(b_para.cpu()).reshape(1, -1) np.savetxt( ('./quantizer_output/q_bias_reorder/q_b_reorder_%s.txt' % self.name), b_para, delimiter='\n') ######权重和偏置的重排序数据的二进制文件保存 bias_weight_reorder = np.append(b_para, q_weight_reorder) wb_flat = bias_weight_reorder.astype(np.int8) writer = open( './quantizer_output/q_weight_reorder/%s_bias_weight_q_bin' % self.name, "wb") writer.write(wb_flat) writer.close() ################偏置重排序结束 # 量化卷积 output = F.conv2d( input=input, weight=q_weight, bias=q_bias, # 注意,这里加bias,做完整的conv+bn stride=self.stride, padding=self.padding, dilation=self.dilation, groups=self.groups) if self.activate == 'leaky': output = F.leaky_relu(output, 0.125 if not self.maxabsscaler else 0.25, inplace=True) elif self.activate == 'relu6': output = F.relu6(output, inplace=True) elif self.activate == 'h_swish': output = output * (F.relu6(output + 3.0, inplace=True) / 6.0) elif self.activate == 'relu': output = F.relu(output, inplace=True) elif self.activate == 'mish': output = output * F.softplus(output).tanh() elif self.activate == 'linear': # return output pass else: print(self.activate + "%s is not supported !") if self.quantizer_output == True: if not os.path.isdir('./quantizer_output/q_activation_out'): os.makedirs('./quantizer_output/q_activation_out') if not os.path.isdir('./quantizer_output/a_scale_out'): os.makedirs('./quantizer_output/a_scale_out') if not os.path.isdir('./quantizer_output/q_activation_max'): os.makedirs('./quantizer_output/q_activation_max') if not os.path.isdir('./quantizer_output/max_activation_count'): os.makedirs('./quantizer_output/max_activation_count') if not os.path.isdir('./quantizer_output/q_activation_reorder'): os.makedirs('./quantizer_output/q_activation_reorder') if self.layer_idx == -1: ##################输出当前激活的量化因子 activation_scale = -self.activation_quantizer.get_scale() np.savetxt(('./quantizer_output/a_scale_out/a_scale_%s.txt' % self.name), activation_scale, delimiter='\n') ##################输出当前层的量化激活 q_activation_txt = self.activation_quantizer.get_quantize_value( output) a_para = q_activation_txt #############输入特征图重排序 if self.reorder == True: # 重排序参数 # print("use activation reorder!") shape_input = a_para.shape[1] num_TN = int(shape_input / self.TN) remainder_TN = shape_input % self.TN first = True reorder_a_para = None if self.activate == 'linear': print('layer-linear reorder!') temp = a_para[:, 0:remainder_TN, :, :] temp = temp.view(temp.shape[1], temp.shape[2], temp.shape[3]) temp = temp.permute(2, 1, 0).contiguous().view(-1) if first: reorder_a_para = temp.clone().cpu().data.numpy() first = False else: reorder_a_para = np.append(reorder_a_para, temp.cpu().data.numpy()) else: for k in range(num_TN): temp = a_para[:, k * self.TN:(k + 1) * self.TN, :, :] temp = temp.view(temp.shape[1], temp.shape[2], temp.shape[3]) temp = temp.permute(2, 1, 0).contiguous().view(-1) if first: reorder_a_para = temp.clone().cpu().data.numpy( ) first = False else: reorder_a_para = np.append( reorder_a_para, temp.cpu().data.numpy()) a_para_flatten = reorder_a_para #####验证重排序结果的正确性 '''if a_para_flatten.size == a_para.shape[0] * a_para.shape[1] * a_para.shape[2] * a_para.shape[3]: print("activation convert correctly!") else: print("activation convert mismatchingly!")''' q_activation_reorder = a_para_flatten q_activation_reorder = np.array( q_activation_reorder).reshape(1, -1) np.savetxt(( './quantizer_output/q_activation_reorder/a_reorder_%s.txt' % self.name), q_activation_reorder, delimiter='\n') ###保存重排序的二进制文件 activation_flat = q_activation_reorder.astype(np.int8) writer = open( './quantizer_output/q_activation_reorder/%s_activation_q_bin' % self.name, "wb") writer.write(activation_flat) writer.close() ##########特征图重排序结束 q_activation_txt = np.array(q_activation_txt.cpu()).reshape( 1, -1) q_activation_max = [np.max(q_activation_txt) ] # 统计该层的最大值(即查看是否有溢出) max_activation_count = [ np.sum( abs(q_activation_txt) >= (1 << (self.w_bits - 1)) - 1) ] # 统计该层溢出的数目 # q_weight_max = np.argmax(q_weight_txt) np.savetxt(( './quantizer_output/max_activation_count/max_a_count_%s.txt' % self.name), max_activation_count) np.savetxt( ('./quantizer_output/q_activation_max/q_a_max_%s.txt' % self.name), q_activation_max) np.savetxt( ('./quantizer_output/q_activation_out/q_activation_%s.txt' % self.name), q_activation_txt, delimiter='\n') elif int(self.name[1:4]) == self.layer_idx: ##################输出当前激活的量化因子 activation_scale = -self.activation_quantizer.get_scale() np.savetxt(('./quantizer_output/a_scale_out/a_scale_%s.txt' % self.name), activation_scale, delimiter='\n') ##################输出当前层的量化激活 q_activation_txt = self.activation_quantizer.get_quantize_value( output) a_para = q_activation_txt #############输入特征图重排序 if self.reorder == True: # 重排序参数 # print("use activation reorder!") shape_input = a_para.shape[1] num_TN = int(shape_input / self.TN) remainder_TN = shape_input % self.TN first = True reorder_a_para = None if self.activate == 'linear': print('layer-linear reorder!') temp = a_para[:, 0:remainder_TN, :, :] temp = temp.view(temp.shape[1], temp.shape[2], temp.shape[3]) temp = temp.permute(2, 1, 0).contiguous().view(-1) if first: reorder_a_para = temp.clone().cpu().data.numpy() first = False else: reorder_a_para = np.append(reorder_a_para, temp.cpu().data.numpy()) else: for k in range(num_TN): temp = a_para[:, k * self.TN:(k + 1) * self.TN, :, :] temp = temp.view(temp.shape[1], temp.shape[2], temp.shape[3]) temp = temp.permute(2, 1, 0).contiguous().view(-1) if first: reorder_a_para = temp.clone().cpu().data.numpy( ) first = False else: reorder_a_para = np.append( reorder_a_para, temp.cpu().data.numpy()) a_para_flatten = reorder_a_para #####验证重排序结果的正确性 '''if a_para_flatten.size == a_para.shape[0] * a_para.shape[1] * a_para.shape[2] * a_para.shape[3]: print("activation convert correctly!") else: print("activation convert mismatchingly!")''' q_activation_reorder = a_para_flatten q_activation_reorder = np.array( q_activation_reorder).reshape(1, -1) np.savetxt(( './quantizer_output/q_activation_reorder/a_reorder_%s.txt' % self.name), q_activation_reorder, delimiter='\n') ###保存重排序的二进制文件 activation_flat = q_activation_reorder.astype(np.int8) writer = open( './quantizer_output/q_activation_reorder/%s_activation_q_bin' % self.name, "wb") writer.write(activation_flat) writer.close() ##########特征图重排序结束 q_activation_txt = np.array(q_activation_txt.cpu()).reshape( 1, -1) q_activation_max = [np.max(q_activation_txt) ] # 统计该层的最大值(即查看是否有溢出) max_activation_count = [ np.sum( abs(q_activation_txt) >= (1 << (self.w_bits - 1)) - 1) ] # 统计该层溢出的数目 # q_weight_max = np.argmax(q_weight_txt) np.savetxt(( './quantizer_output/max_activation_count/max_a_count_%s.txt' % self.name), max_activation_count) np.savetxt( ('./quantizer_output/q_activation_max/q_a_max_%s.txt' % self.name), q_activation_max) np.savetxt( ('./quantizer_output/q_activation_out/q_activation_%s.txt' % self.name), q_activation_txt, delimiter='\n') output = self.activation_quantizer(output) return output