def quantize_fusion(self, G: NNGraph, node: ConvFusionParameters, in_qs, force_out=None) -> SymmetricQuantizationRecord: if node.fusion_type == 'conv_active': result = OrderedDict() nodes = node.contained_nodes() conv_node = nodes[0] conv_astats = self._activation_stats.get(NodeId(node, conv_node)) conv_qrec = self.calculate_filter_q(conv_node, conv_astats, in_q=in_qs[0], force_width=self._force_width, out_as_acc=True) result[NodeId(node, conv_node)] = conv_qrec act_node = nodes[1] act_astats = self._activation_stats.get(NodeId(node, act_node)) if force_out and force_out.bits: act_max_q = self.compute_activation_out_maxq( act_node, force_out.bits) if force_out.q is not None: if (act_max_q is not None and force_out.q > act_max_q ) or force_out.q > conv_qrec.out_qs[0].q: # We cannot shift left in the kernel # TODO - This should try to increase the input q and perhaps the width # Unlikely to happen raise NotImplementedError() act_o_q = QType(bits=force_out.bits, q=force_out.q, signed=True) else: if act_max_q is not None: act_o_q.q = min(act_max_q, act_o_q.q) else: act_o_q = QType.from_min_max( max_val=act_astats['range_out'][0]['max'], min_val=act_astats['range_out'][0]['min'], bits=self._force_width) act_o_q.q = min(act_o_q.q, conv_qrec.out_qs[0].q) if force_out and force_out.q: if force_out.q > act_max_q or force_out.q > conv_qrec.out_qs[ 0].q: # We cannot shift left in the kernel # TODO - This should try to increase the input q and perhaps the width # Unlikely to happen raise NotImplementedError() act_o_q.q = force_out.q act_qrec = SymmetricQuantizationRecord(in_qs=conv_qrec.out_qs, out_qs=[act_o_q]) result[NodeId(node, act_node)] = act_qrec return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=act_qrec.out_qs), result else: return self.default_quantize_fusion(G, node, in_qs, force_out=force_out)
def calculate_output_q(self, node: Parameters, astats, in_qs, force_width=None, force_out=None): del node if force_out: if force_out.bits: if force_out.q: o_q = QType(bits=force_out.bits, q=force_out.q, signed=True) else: o_q = QType.from_min_max( max_val=astats['range_out'][0]['max'], min_val=astats['range_out'][0]['min'], bits=force_out.bits) elif force_out.q: o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'], min_val=astats['range_out'][0]['min'], bits=force_width) o_q.q = force_out.q else: o_q = QType.from_min_max(max_val=astats['range_out'][0]['max'], min_val=astats['range_out'][0]['min'], bits=force_width) return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def calculate_output_q(self, node: Parameters, astats, in_qs, min_qsnr=None, force_width=None, force_out=None): del node if force_out: if force_out.bits: if force_out.q: o_q = QType(bits=force_out.bits, q=force_out.q, signed=True) else: o_q = self.get_quantization(astats, None, force_out.bits) elif force_out.q: o_q = self.get_quantization(astats, min_qsnr, force_width) o_q.q = force_out.q else: o_q = self.get_quantization(astats, min_qsnr, force_width) return SymmetricQuantizationRecord(in_qs=in_qs, out_qs=[o_q])
def calculate_filter_q(self, node: Parameters, astats, in_q: QType, force_width=None, force_out=None, out_as_acc=False): #biases_bits_as_acc=False): w_q = QType.from_array(arr=node.weights, bits=force_width, signed=True) calc_width = 32 calc_q = in_q.q + w_q.q acc_bits = bits(astats['range_acc']['max'], astats['range_acc']['min']) act_bits = bits(astats['range_out'][0]['max'], astats['range_out'][0]['min']) act_acc_bits = max(acc_bits, act_bits) calc_int_bits = calc_width - calc_q if calc_int_bits < act_acc_bits: # we don't have enough space for the integer portion so reduce the precision of # the weights missing_bits = act_acc_bits - calc_int_bits # TODO - This needs improving assert w_q.q >= missing_bits, "no space in weights to reduce precision" w_q.q = w_q.q - missing_bits calc_q = in_q.q + w_q.q calc_int_bits = calc_width - calc_q c_q = acc_q = QType(bits=calc_width, q=calc_q, signed=True) if out_as_acc: o_q = c_q if node.has_bias: b_q = QType.from_array(node.biases, bits=force_width, signed=True) else: # The output size is requested to be force_out_width size if force_out and force_out.bits: # The output fixed point position is also forced if force_out.q: if (force_out.bits - force_out.q) < act_acc_bits: # clipping so cannot completely satisfy o_q = QType(bits=force_out.bits, q=force_out.bits - act_acc_bits, signed=True) else: if force_out.q > calc_q: # We cannot shift left in the kernel # TODO - This should try to increase the input q # Unlikely to happen raise NotImplementedError() # We can satisfy the force o_q = QType(bits=force_out.bits, q=force_out.q, signed=True) else: # Only the width is forced o_q = QType.from_min_max(astats['range_out'][0]['max'], astats['range_out'][0]['min'], bits=force_out.bits) else: # The output width is not forced so calculate the output q normally o_q = QType.from_min_max(astats['range_out'][0]['max'], astats['range_out'][0]['min'], bits=force_width) if force_out and force_out.q: # The output fixed point position is forced if force_out.q > calc_q: # We cannot shift left in the kernel # TODO - This should try to increase the input q # Unlikely to happen raise NotImplementedError() o_q.q = force_out.q if node.has_bias: b_q = o_q # make sure that the biases are not stored more precisily than the accumulator. It's pointless and will # cause a negative shift if b_q.q > acc_q.q: b_q.q = acc_q.q if isinstance(node, MultiplicativeBiasParameters): if node.has_mul_bias: mb_q = QType.from_array(node.mul_biases, bits=force_width) else: mb_q = None qrec = SymmetricScalableFilterQuantizationRecord( in_qs=[in_q], out_qs=[o_q], calc_q=c_q, acc_q=acc_q, biases_q=b_q, weights_q=w_q, mul_biases_q=mb_q, constants_are_quantized=False) else: qrec = SymmetricFilterQuantizationRecord( in_qs=[in_q], out_qs=[o_q], calc_q=c_q, acc_q=acc_q, biases_q=b_q, weights_q=w_q, constants_are_quantized=False) LOG.debug("filter %s qrec %s", node.name, qrec) return qrec