def test_raise_exception_bitwidth(self): """Raise an exception if an input value is larger than bitwidth.""" packer = Packer(2, 32) test_input = np.zeros([64], dtype=np.float32) test_input[0:-1:2] = 1 test_input[0:-1:4] = 4 with self.assertRaises(ValueError): packer.run(test_input)
def test_raise_exception_wordsize(self): """Raise an exception if an input value is not multiple of word size.""" packer = Packer(2, 32) test_input = np.zeros([83], dtype=np.float32) test_input[0:-1:2] = 1 test_input[0:-1:4] = 4 with self.assertRaises(ValueError): packer.run(test_input)
def test_bw1_dividable_by_wordsize(self): """Test for when the input tensor size is able to divide by wordsize (1 bit version).""" packer = Packer(1, 32) test_input = np.zeros([32], dtype=np.float32) test_input[0:6] = [0, 1, 0, 1, 0, 1] test_output = packer.run(test_input) self.assertEqual(test_output[0], 42)
def test_bw1_not_dividable_by_wordsize(self): """Test for when the input tensor size is not able to divide by wordsize (1 bit version).""" packer = Packer(1, 37) test_input = np.zeros([37], dtype=np.float32) test_input[0::2] = 1 test_output = packer.run(test_input) expected_output = [1431655765] np.testing.assert_array_equal(test_output[0], expected_output)
def test_bw2_dividable_by_wordsize(self): """Test for when the input tensor size is able to divide by wordsize (2 bit version).""" packer = Packer(2, 32) test_input = np.zeros([32], dtype=np.float32) test_input[0:6] = [0, 3, 0, 3, 0, 3] test_output = packer.run(test_input) expected_output = [42, 42] np.testing.assert_array_equal(test_output[0], expected_output)
def pass_lookup(graph: Graph) -> None: """Lookup. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] to_be_removed = [] exec_list = [ n for n in sort_graph(graph) if n.op_type in quantization_types ] placeholder = [n for n in sort_graph(graph) if n.op_type in 'Input'] for m in exec_list: quantizer = m p1 = quantizer.input_nodes[0] if p1.op_type != 'Reshape': continue p2 = p1.input_nodes[0] if p2.op_type != 'Reshape': continue p3 = p2.input_nodes[0] if p3.op_type != 'Gather': continue p4 = p3.input_nodes[0] if p4.op_type != 'Gather': continue gather_params = p4.input_nodes[0] if gather_params.rank != 2 or gather_params.shape[0] != 256: continue params = gather_params.data data = {'data': params} qtz_data = quantizer.run(**data)['data'] word_size = 32 lu_bitwidth = quantizer.nbit packer = Packer(lu_bitwidth, word_size) lsb = np.zeros((256, ), np.uint32) msb = np.zeros((256, ), np.uint32) idx = 0 for p in qtz_data: data = packer.run(p.astype(np.float32), p.shape).flatten() lsb[idx] = data[0] msb[idx] = data[1] idx += 1 pe_lsb = Constant('pe_lsb_new', QUANTIZED_PACKED_KERNEL(), lsb, dimension_format='TC', packed=True, actual_shape=[256, word_size]) pe_msb = Constant('pe_msb_new', QUANTIZED_PACKED_KERNEL(), msb, dimension_format='TC', packed=True, actual_shape=[256, word_size]) n, h, w, c = quantizer.shape shape = [1, h, w, 2, word_size] pe = Lookup('Lookup', shape, QUANTIZED_PACKED(), { 'input': placeholder[0], 'lsb': pe_lsb, 'msb': pe_msb }, dimension_format='ChHWBCl') get_nodes_in_branch(quantizer, placeholder[0], to_be_removed) placeholder[0].remove_output('output') placeholder[0].add_output('output', pe) pe.add_outputs(quantizer.output_ops) output_op = quantizer.output_op_list[0] target_input_name = 'X' for input_name in output_op._input_names: if quantizer.equals(output_op._input_ops[input_name]): target_input_name = input_name break output_op.add_input(target_input_name, pe) graph.add_op(pe_lsb) graph.add_op(pe_msb) graph.add_op(pe) for op in to_be_removed: graph.remove_op(op)
def pass_pack_weights(graph: Graph) -> None: """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words. If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values then one 32 bit word will contain 32 weights. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] word_size = 32 weight_bitwidth = 1 packer = Packer(weight_bitwidth, word_size) to_be_removed = [] b = 32 for m in exec_list: conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue # Check if we support this kind of quantizer weight_quantizer = conv_node.quantizer if weight_quantizer.op_type not in quantization_types: continue # Quantize the weights weight_quantizer.run_forward() def pad_to_multiple_of_b(tensor, axis, b): shape = list(tensor.shape) pad = (((shape[axis] + b - 1) // b) * b) - shape[axis] shape[axis] = pad return np.zeros(shape) if pad else None padded_data = np.copy(weight_quantizer.data) for axis in [0, 3]: pad_tensor = pad_to_multiple_of_b(padded_data, axis, b) if pad_tensor is not None: padded_data = np.append(padded_data, pad_tensor, axis=axis) tca_output = np.copy(padded_data) oc, kh, kw, kd = padded_data.shape[:] padded_data = padded_data.flatten() tca_output = tca_output.flatten() out_index = 0 for g in range(oc // b): for p in range(kd // b): for h in range(kh): for w in range(kw): for o in range(b): for d in range(b): idx = g * (kw * kh * kd * b) + p * b + h * ( kw * kd) + w * kd + o * (kw * kh * kd) + d tca_output[out_index] = padded_data[idx] out_index += 1 kn2row_output = np.zeros(oc * kh * kw * kd) out_index = 0 for h in range(kh): for w in range(kw): for o in range(oc): for i in range(kd): idx = o * kh * kw * kd + h * kw * kd + w * kd + i kn2row_output[out_index] = padded_data[idx] out_index += 1 op_data = weight_quantizer.binarizer(padded_data) data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) tca_binarized_data = weight_quantizer.binarizer(tca_output) tca_packed_data = packer.run(tca_binarized_data.astype(np.float32), weight_quantizer.dimension) kn2row_binarized_data = weight_quantizer.binarizer(kn2row_output) kn2row_data = packer.run(kn2row_binarized_data.astype(np.float32), weight_quantizer.dimension) shape = [oc, kh, kw, kd] tca_shape = [oc // b, kd // b, kh, kw, b, b] kn2row_shape = [kh, kw, oc, kd] # Create the new constant with the quantized weights quantized_constant = Constant( weight_quantizer.name + '_new', PackedUint32(), data=np.vectorize(lambda k: (~k) & ((0x1 << 32) - 1))(data), dimension_format="NHWC", transposed_dimension_format="OhIhHWOlIl", packed=True, actual_shape=shape, transposed_shape=tca_shape, transposed_data=[(~k) & ((0x1 << 32) - 1) for k in tca_packed_data.flatten()], kn2row_data=[k for k in kn2row_data.flatten()], kn2row_shape=kn2row_shape, kn2row_dimension_format="HWNC") # get nodes to be removed after being disconnected get_nodes_in_branch(weight_quantizer, None, to_be_removed) # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) quantized_constant.add_outputs(weight_quantizer.output_ops) for output_name, consumer_list in weight_quantizer.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): if input_node == weight_quantizer: consumer_node.add_input(input_name, quantized_constant) break for op in to_be_removed: graph.remove_op(op)
def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: ops: List[Operator] = [ node.input_ops[i] for i in node.input_names if node.input_ops.get(i) ] if self._hard_quantized and node in kwargs['qconv']: # data is to be packed ops_have_precomp_values = list( map(lambda x: self._has_precompute_value(x), ops)) ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops)) # check which input node can be pruned if reduce( lambda x, y: x and y, ops_have_precomp_values): # all input has concrete values node.run_forward() self._precomp_dic[node.name] = True # this node can be pruned quantizers = { op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name) } if len(quantizers) > 1: ValueError( f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.' ) self._quantizers[node.name] = list(quantizers.values())[0] else: # an input (must be weight) is to be quantized and packed self._precomp_dic[node.name] = False node.is_quantized = True packer = Packer(self._quantized_bitwidth, self._wordsize) quantizers = { op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name) } if len(quantizers) > 1: ValueError( f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.' ) node.quantizer = list(quantizers.values())[0] for key, op in zip(node.input_names, ops): if self._is_prunable(op): shape = op.shape op_data = node.quantizer.binarizer(op.data) data = packer.run(op_data.astype(np.float32), op.dimension) dtype = op.dtype new_op = Constant(op.name + '_new', dtype, data, packed=True, actual_shape=shape) node.add_input(key, new_op) self._graph.add_op(new_op) self._prune(op) else: self._precompute_or_prune_inputs(node)
def create_quantized_graph2(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: graph = Graph() # input x = Input( 'placeholder', [1, 5, 5, 3], Float32(), ) # constant and internal nodes scaling1, qdata1 = self.binary_mean_scaling(data1) w = Constant('weight', Float32(), qdata1 * scaling1) q = QTZ_binary_mean_scaling('qtz1', [3, 2, 2, 3], Float32(), {'input': w}) # Conv conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), { 'X': x, 'W': w }, kernel_shape=[2, 2]) s1 = Constant('aq_const1', Float32(), np.array(1)) s2 = Constant('aq_const2', Float32(), np.array(2)) aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), { 'X': conv1, 'Y': s1, 'Z': s2 }) from modules.packer import Packer packer = Packer(1, 32) scaling2, qdata2 = self.binary_mean_scaling(data2) w2 = Constant('weight2', Uint32(), packer.run(qdata2), packed=True, actual_shape=[3, 2, 2, 3]) q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Float32(), {'input': w2}) q2.scaling_factor = scaling2 conv2 = Conv( 'conv2', [1, 3, 3, 3], Float32(), { 'X': aq, 'W': w2 }, kernel_shape=[2, 2], quantized=True, ) conv2.quantizer = q2 scaling3, qdata3 = self.binary_mean_scaling(data3) w3 = Constant('weight2', Uint32(), packer.run(qdata3), packed=True, actual_shape=[3, 2, 2, 3]) q3 = QTZ_binary_mean_scaling('qtz3', [3, 2, 2, 3], Float32(), {'input': w3}) q3.scaling_factor = scaling3 conv3 = Conv('conv3', [1, 3, 3, 3], Float32(), { 'X': aq, 'W': w3 }, kernel_shape=[2, 2], quantized=True) conv3.quantizer = q3 y1 = Output('output1', [1, 3, 3, 3], Float32(), {'input': conv2}) y2 = Output('output2', [1, 3, 3, 3], Float32(), {'input': conv3}) # add ops to the graph graph.add_op_and_inputs(y1) graph.add_op_and_inputs(y2) return graph, scaling2, scaling3
def create_quantized_graph(self, data: np.ndarray, data2: np.ndarray, data3: np.ndarray) \ -> Tuple[Graph, np.float32, np.float32]: graph = Graph() # two inputs x = Input( 'placeholder', [1, 5, 5, 3], Float32(), ) from modules.packer import Packer packer = Packer(1, 32) data = data.transpose([3, 2, 1, 0]) scaling, qdata = self.binary_mean_scaling(data) shape = list(data.shape) w = Constant( 'weight', Float32(), qdata * scaling, ) q = QTZ_binary_mean_scaling('qtz1', shape, Float32(), {'input': w}) q.scaling_factor = scaling # Conv conv1 = Conv( 'conv1', [1, 4, 4, 3], Float32(), { 'X': x, 'W': w }, kernel_shape=[2, 2], ) s1 = Constant('aq_const1', Float32(), np.array(1)) s2 = Constant('aq_const2', Float32(), np.array(2)) aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), { 'X': conv1, 'Y': s1, 'Z': s2 }) dummy = Transpose('dummy', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'data': aq}, perm=[0, 1, 2, 3]) scaling2, qdata2 = self.binary_mean_scaling(data2) w2 = Constant('weight2', Uint32(), packer.run(qdata2), packed=True, actual_shape=[3, 2, 2, 3]) # quantizer connected to conv2 as 'conv2.quantizer' q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Uint32(), {'input': w2}) q2.scaling_factor = scaling2 conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), { 'X': dummy, 'W': w2 }, kernel_shape=[2, 2], quantized=True) conv2.quantizer = q2 s3 = Constant('aq_const1', Float32(), np.array(1)) s4 = Constant('aq_const2', Float32(), np.array(2)) aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), { 'X': conv2, 'Y': s3, 'Z': s4 }) w3 = Constant('weight3', Float32(), data3) conv3 = Conv('conv3', [1, 2, 2, 3], Float32(), { 'X': aq2, 'W': w3 }, kernel_shape=[2, 2]) # One output y = Output('output', [1, 2, 2, 3], Float32(), {'input': conv3}) # add ops to the graph graph.add_op_and_inputs(y) return graph, scaling, scaling2
def pass_pack_weights(graph: Graph) -> None: """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words. If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values then one 32 bit word will contain 32 weights. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] word_size = 32 weight_bitwidth = 1 packer = Packer(weight_bitwidth, word_size) to_be_removed = [] for m in exec_list: conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue # Check if we support this kind of quantizer weight_quantizer = conv_node.quantizer if weight_quantizer.op_type not in quantization_types: continue # Quantize the weights weight_quantizer.run_forward() op_data = weight_quantizer.binarizer(weight_quantizer.data) data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) # Create the new constant with the quantized weights oh = conv_node.height ow = conv_node.width od = conv_node.channel kh = conv_node.kernel_height kw = conv_node.kernel_width kd = conv_node.input_ops['X'].channel quantized_constant = Constant(weight_quantizer.name + '_new', Uint32(), data, packed=True, actual_shape=weight_quantizer.shape, transposed_data=_transpose_kernels( data, oh, ow, od, kh, kw, kd)) # get nodes to be removed after being disconnected get_nodes_in_branch(weight_quantizer, None, to_be_removed) # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) quantized_constant.add_outputs(weight_quantizer.output_ops) for output_name, consumer_list in weight_quantizer.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): if input_node == weight_quantizer: consumer_node.add_input(input_name, quantized_constant) break for op in to_be_removed: graph.remove_op(op)