def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: graph = Graph() # input x = Input('placeholder', [1, 5, 5, 3], Float32()) # Conv1 w1 = Constant('weight1', Float32(), data1) conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) # activation quantizer s1 = Constant('aq_const1', Float32(), np.array(1)) s2 = Constant('aq_const2', Float32(), np.array(2)) aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) # Conv2 w2 = Constant('weight2', Float32(), data2) kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq, 'W': kq}, kernel_shape=[2, 2]) conv2.a_quantizer = [aq] conv2.quantizer = kq # One output y = Output('output', [1, 3, 3, 3], Float32(), {'input': conv2}) # add ops to the graph graph.add_op_and_inputs(y) return graph
def create_sample_graph(data1: np.ndarray, data2: np.ndarray) -> Graph: graph = Graph() # input x = Input('placeholder', [1, 5, 5, 3], Float32()) # Conv1 w1 = Constant('weight1', Float32(), data1) conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), {'X': x, 'W': w1}, kernel_shape=[2, 2]) # activation quantizer s1 = Constant('aq_const1', Int32(), np.array([2], dtype=np.int32)) s2 = Constant('aq_const2', Float32(), np.array([2.0], dtype=np.float32)) aq1 = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], Float32(), {'X': conv1, 'Y': s1, 'Z': s2}) # Conv2 w2 = Constant('weight2', Float32(), data2) kq = QTZ_binary_mean_scaling('kqtz1', [1, 2, 2, 3], Float32(), {'input': w2}) conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), {'X': aq1, 'W': kq}, kernel_shape=[2, 2]) conv2.a_quantizer = [aq1] conv2.quantizer = kq conv2.is_quantized = True sc = Constant('bn_scale', Float32(), np.random.rand(3)) be = Constant('bn_b', Float32(), np.random.rand(3)) mu = Constant('bn_mu', Float32(), np.random.rand(3)) va = Constant('bn_var', Float32(), np.random.rand(3)) bn = BatchNormalization('bn', [1, 3, 3, 3], Float32(), {'X': conv2, 'scale': sc, 'B': be, 'mean': mu, 'var': va}) # activation quantizer s3 = Constant('aq_const3', Int32(), np.array([2], dtype=np.int32)) s4 = Constant('aq_const4', Float32(), np.array([2.0], dtype=np.float32)) aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), {'X': bn, 'Y': s3, 'Z': s4}) # One output y = Output('output', [1, 3, 3, 3], Float32(), {'input': aq2}) # add ops to the graph graph.add_op_and_inputs(y) return graph
def run_forward_conv(self, node: Conv, **kwargs: Any) -> None: ops: List[Operator] = [ node.input_ops[i] for i in node.input_names if node.input_ops.get(i) ] if self._hard_quantized and node in kwargs['qconv']: # data is to be packed ops_have_precomp_values = list( map(lambda x: self._has_precompute_value(x), ops)) ops_are_prunable = list(map(lambda x: self._is_prunable(x), ops)) # check which input node can be pruned if reduce( lambda x, y: x and y, ops_have_precomp_values): # all input has concrete values node.run_forward() self._precomp_dic[node.name] = True # this node can be pruned quantizers = { op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name) } if len(quantizers) > 1: ValueError( f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.' ) self._quantizers[node.name] = list(quantizers.values())[0] else: # an input (must be weight) is to be quantized and packed self._precomp_dic[node.name] = False node.is_quantized = True packer = Packer(self._quantized_bitwidth, self._wordsize) quantizers = { op.name: self._quantizers[op.name] for op in ops if self._quantizers.get(op.name) } if len(quantizers) > 1: ValueError( f'{node.name}: multiple quantized inputs with {node.op_type} are not supported.' ) node.quantizer = list(quantizers.values())[0] for key, op in zip(node.input_names, ops): if self._is_prunable(op): shape = op.shape op_data = node.quantizer.binarizer(op.data) data = packer.run(op_data.astype(np.float32), op.dimension) dtype = op.dtype new_op = Constant(op.name + '_new', dtype, data, packed=True, actual_shape=shape) node.add_input(key, new_op) self._graph.add_op(new_op) self._prune(op) else: self._precompute_or_prune_inputs(node)
def create_quantized_graph2(self, data1: np.ndarray, data2: np.ndarray, data3: np.ndarray) -> Graph: graph = Graph() # input x = Input( 'placeholder', [1, 5, 5, 3], Float32(), ) # constant and internal nodes scaling1, qdata1 = self.binary_mean_scaling(data1) w = Constant('weight', Float32(), qdata1 * scaling1) q = QTZ_binary_mean_scaling('qtz1', [3, 2, 2, 3], Float32(), {'input': w}) # Conv conv1 = Conv('conv1', [1, 4, 4, 3], Float32(), { 'X': x, 'W': w }, kernel_shape=[2, 2]) s1 = Constant('aq_const1', Float32(), np.array(1)) s2 = Constant('aq_const2', Float32(), np.array(2)) aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), { 'X': conv1, 'Y': s1, 'Z': s2 }) from modules.packer import Packer packer = Packer(1, 32) scaling2, qdata2 = self.binary_mean_scaling(data2) w2 = Constant('weight2', Uint32(), packer.run(qdata2), packed=True, actual_shape=[3, 2, 2, 3]) q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Float32(), {'input': w2}) q2.scaling_factor = scaling2 conv2 = Conv( 'conv2', [1, 3, 3, 3], Float32(), { 'X': aq, 'W': w2 }, kernel_shape=[2, 2], quantized=True, ) conv2.quantizer = q2 scaling3, qdata3 = self.binary_mean_scaling(data3) w3 = Constant('weight2', Uint32(), packer.run(qdata3), packed=True, actual_shape=[3, 2, 2, 3]) q3 = QTZ_binary_mean_scaling('qtz3', [3, 2, 2, 3], Float32(), {'input': w3}) q3.scaling_factor = scaling3 conv3 = Conv('conv3', [1, 3, 3, 3], Float32(), { 'X': aq, 'W': w3 }, kernel_shape=[2, 2], quantized=True) conv3.quantizer = q3 y1 = Output('output1', [1, 3, 3, 3], Float32(), {'input': conv2}) y2 = Output('output2', [1, 3, 3, 3], Float32(), {'input': conv3}) # add ops to the graph graph.add_op_and_inputs(y1) graph.add_op_and_inputs(y2) return graph, scaling2, scaling3
def create_quantized_graph(self, data: np.ndarray, data2: np.ndarray, data3: np.ndarray) \ -> Tuple[Graph, np.float32, np.float32]: graph = Graph() # two inputs x = Input( 'placeholder', [1, 5, 5, 3], Float32(), ) from modules.packer import Packer packer = Packer(1, 32) data = data.transpose([3, 2, 1, 0]) scaling, qdata = self.binary_mean_scaling(data) shape = list(data.shape) w = Constant( 'weight', Float32(), qdata * scaling, ) q = QTZ_binary_mean_scaling('qtz1', shape, Float32(), {'input': w}) q.scaling_factor = scaling # Conv conv1 = Conv( 'conv1', [1, 4, 4, 3], Float32(), { 'X': x, 'W': w }, kernel_shape=[2, 2], ) s1 = Constant('aq_const1', Float32(), np.array(1)) s2 = Constant('aq_const2', Float32(), np.array(2)) aq = QTZ_linear_mid_tread_half('aqtz1', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), { 'X': conv1, 'Y': s1, 'Z': s2 }) dummy = Transpose('dummy', [1, 4, 4, 3], QUANTIZED_NOT_PACKED(), {'data': aq}, perm=[0, 1, 2, 3]) scaling2, qdata2 = self.binary_mean_scaling(data2) w2 = Constant('weight2', Uint32(), packer.run(qdata2), packed=True, actual_shape=[3, 2, 2, 3]) # quantizer connected to conv2 as 'conv2.quantizer' q2 = QTZ_binary_mean_scaling('qtz2', [3, 2, 2, 3], Uint32(), {'input': w2}) q2.scaling_factor = scaling2 conv2 = Conv('conv2', [1, 3, 3, 3], Float32(), { 'X': dummy, 'W': w2 }, kernel_shape=[2, 2], quantized=True) conv2.quantizer = q2 s3 = Constant('aq_const1', Float32(), np.array(1)) s4 = Constant('aq_const2', Float32(), np.array(2)) aq2 = QTZ_linear_mid_tread_half('aqtz2', [1, 3, 3, 3], Float32(), { 'X': conv2, 'Y': s3, 'Z': s4 }) w3 = Constant('weight3', Float32(), data3) conv3 = Conv('conv3', [1, 2, 2, 3], Float32(), { 'X': aq2, 'W': w3 }, kernel_shape=[2, 2]) # One output y = Output('output', [1, 2, 2, 3], Float32(), {'input': conv3}) # add ops to the graph graph.add_op_and_inputs(y) return graph, scaling, scaling2