def add_node_to_graph_recursive(self, current: Any, graph: Graph, visited: Set[Any], added: Dict[str, Operator], data_format: str, nodes_to_remove) \ -> Operator: if current in visited: return added[current.name] # return current added_op_dic: Dict[str, Operator] = {} current_format, input_formats = self._get_format(current, data_format) inputs = self.find_inputs(current) for in_put, in_format in zip(inputs, input_formats): in_op = self.add_node_to_graph_recursive(in_put, graph, visited, added, in_format, nodes_to_remove) added_op_dic[in_op.name] = in_op op = self.create_new_op(current, added_op_dic, current_format, input_formats, nodes_to_remove) graph.add_op(op) visited.add(current) added[op.name] = op return op
def pass_constant_folding(graph: Graph) -> None: """Given a node N, if the value of each input of N is known at compilation time then N will be executed. The node N and its inputs will be replaced with a Constant node which holds the computed output of N. Args: graph (Graph): The input graph. It will be modified in-place. processed_nodes (list): The list of the processed nodes so far. """ done = False processed_nodes = [] while not done: exec_list = sort_graph(graph) processed_before_precompute = len(processed_nodes) to_be_removed = [] for m in exec_list: if m in processed_nodes: continue # We want operators with inputs if not m.input_nodes: continue precomputable = True for input_node in m.input_nodes: if input_node.op_type != 'Constant': precomputable = False if not precomputable: continue processed_nodes += m.input_nodes processed_nodes.append(m) data = m.run_forward() new_constant = Constant(m.name + '_new', m.dtype, data, dimension_format=m.dimension) graph.add_op(new_constant) # get nodes to be removed after being disconnected get_nodes_in_branch(m, None, to_be_removed) new_constant.add_outputs({'output': m.output_ops.values()}) for output_name, consumer_list in m.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items( ): if input_node == m: consumer_node.add_input(input_name, new_constant) break for op in to_be_removed: graph.remove_op(op) done = len(processed_nodes) == processed_before_precompute
def test_graph_conv(self) -> None: """Test code for making a simple graph with Conv.""" graph = Graph() # two inputs x = Input( 'input', [1, 5, 5, 3], Float32(), ) w = Constant('weight', Float32(), np.zeros([1, 2, 2, 3])) # Conv conv = Conv( 'conv', [1, 4, 4, 3], Float32(), { 'X': x, 'W': w }, # you can get these keys by 'Conv.input_names' kernel_shape=[2, 2]) # One output y = Output( 'output', [1, 4, 4, 3], Float32(), {'input': conv} # you can get this key by 'Output.input_names' ) # add ops to the graph graph.add_op(x) graph.add_op(w) graph.add_op(conv) graph.add_op(y) self.assertTrue(graph.check_nodes(), "All inputs of operators must match their outputs.") print("Graph test passed!")
def pass_lookup(graph: Graph) -> None: """Lookup. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] to_be_removed = [] exec_list = [ n for n in sort_graph(graph) if n.op_type in quantization_types ] placeholder = [n for n in sort_graph(graph) if n.op_type in 'Input'] for m in exec_list: quantizer = m p1 = quantizer.input_nodes[0] if p1.op_type != 'Reshape': continue p2 = p1.input_nodes[0] if p2.op_type != 'Reshape': continue p3 = p2.input_nodes[0] if p3.op_type != 'Gather': continue p4 = p3.input_nodes[0] if p4.op_type != 'Gather': continue gather_params = p4.input_nodes[0] if gather_params.rank != 2 or gather_params.shape[0] != 256: continue params = gather_params.data data = {'data': params} qtz_data = quantizer.run(**data)['data'] word_size = 32 lu_bitwidth = quantizer.nbit packer = Packer(lu_bitwidth, word_size) lsb = np.zeros((256, ), np.uint32) msb = np.zeros((256, ), np.uint32) idx = 0 for p in qtz_data: data = packer.run(p.astype(np.float32), p.shape).flatten() lsb[idx] = data[0] msb[idx] = data[1] idx += 1 pe_lsb = Constant('pe_lsb_new', QUANTIZED_PACKED_KERNEL(), lsb, dimension_format='TC', packed=True, actual_shape=[256, word_size]) pe_msb = Constant('pe_msb_new', QUANTIZED_PACKED_KERNEL(), msb, dimension_format='TC', packed=True, actual_shape=[256, word_size]) n, h, w, c = quantizer.shape shape = [1, h, w, 2, word_size] pe = Lookup('Lookup', shape, QUANTIZED_PACKED(), { 'input': placeholder[0], 'lsb': pe_lsb, 'msb': pe_msb }, dimension_format='ChHWBCl') get_nodes_in_branch(quantizer, placeholder[0], to_be_removed) placeholder[0].remove_output('output') placeholder[0].add_output('output', pe) pe.add_outputs(quantizer.output_ops) output_op = quantizer.output_op_list[0] target_input_name = 'X' for input_name in output_op._input_names: if quantizer.equals(output_op._input_ops[input_name]): target_input_name = input_name break output_op.add_input(target_input_name, pe) graph.add_op(pe_lsb) graph.add_op(pe_msb) graph.add_op(pe) for op in to_be_removed: graph.remove_op(op)
def pass_pack_weights(graph: Graph) -> None: """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words. If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values then one 32 bit word will contain 32 weights. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] word_size = 32 weight_bitwidth = 1 packer = Packer(weight_bitwidth, word_size) to_be_removed = [] b = 32 for m in exec_list: conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue # Check if we support this kind of quantizer weight_quantizer = conv_node.quantizer if weight_quantizer.op_type not in quantization_types: continue # Quantize the weights weight_quantizer.run_forward() def pad_to_multiple_of_b(tensor, axis, b): shape = list(tensor.shape) pad = (((shape[axis] + b - 1) // b) * b) - shape[axis] shape[axis] = pad return np.zeros(shape) if pad else None padded_data = np.copy(weight_quantizer.data) for axis in [0, 3]: pad_tensor = pad_to_multiple_of_b(padded_data, axis, b) if pad_tensor is not None: padded_data = np.append(padded_data, pad_tensor, axis=axis) tca_output = np.copy(padded_data) oc, kh, kw, kd = padded_data.shape[:] padded_data = padded_data.flatten() tca_output = tca_output.flatten() out_index = 0 for g in range(oc // b): for p in range(kd // b): for h in range(kh): for w in range(kw): for o in range(b): for d in range(b): idx = g * (kw * kh * kd * b) + p * b + h * ( kw * kd) + w * kd + o * (kw * kh * kd) + d tca_output[out_index] = padded_data[idx] out_index += 1 kn2row_output = np.zeros(oc * kh * kw * kd) out_index = 0 for h in range(kh): for w in range(kw): for o in range(oc): for i in range(kd): idx = o * kh * kw * kd + h * kw * kd + w * kd + i kn2row_output[out_index] = padded_data[idx] out_index += 1 op_data = weight_quantizer.binarizer(padded_data) data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) tca_binarized_data = weight_quantizer.binarizer(tca_output) tca_packed_data = packer.run(tca_binarized_data.astype(np.float32), weight_quantizer.dimension) kn2row_binarized_data = weight_quantizer.binarizer(kn2row_output) kn2row_data = packer.run(kn2row_binarized_data.astype(np.float32), weight_quantizer.dimension) shape = [oc, kh, kw, kd] tca_shape = [oc // b, kd // b, kh, kw, b, b] kn2row_shape = [kh, kw, oc, kd] # Create the new constant with the quantized weights quantized_constant = Constant( weight_quantizer.name + '_new', PackedUint32(), data=np.vectorize(lambda k: (~k) & ((0x1 << 32) - 1))(data), dimension_format="NHWC", transposed_dimension_format="OhIhHWOlIl", packed=True, actual_shape=shape, transposed_shape=tca_shape, transposed_data=[(~k) & ((0x1 << 32) - 1) for k in tca_packed_data.flatten()], kn2row_data=[k for k in kn2row_data.flatten()], kn2row_shape=kn2row_shape, kn2row_dimension_format="HWNC") # get nodes to be removed after being disconnected get_nodes_in_branch(weight_quantizer, None, to_be_removed) # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) quantized_constant.add_outputs(weight_quantizer.output_ops) for output_name, consumer_list in weight_quantizer.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): if input_node == weight_quantizer: consumer_node.add_input(input_name, quantized_constant) break for op in to_be_removed: graph.remove_op(op)
def pass_pack_weights(graph: Graph) -> None: """Given a Quantized convolution node C, it will pack the weights of C into 32 bit words. If the node Q that apply quantization to the weights of C quantizes, for example, into 1 bit values then one 32 bit word will contain 32 weights. Parameters ---------- graph : Graph The input graph. It will be modified in-place. """ exec_list = [n for n in sort_graph(graph) if n.op_type == 'Conv'] quantization_types = [ 'QTZ_binary_mean_scaling', 'QTZ_linear_mid_tread_half', 'QTZ_binary_channel_wise_mean_scaling' ] word_size = 32 weight_bitwidth = 1 packer = Packer(weight_bitwidth, word_size) to_be_removed = [] for m in exec_list: conv_node = m # check if this is a quantized convolution if not conv_node.quantizer or not conv_node.a_quantizer: continue # Check if we support this kind of quantizer weight_quantizer = conv_node.quantizer if weight_quantizer.op_type not in quantization_types: continue # Quantize the weights weight_quantizer.run_forward() op_data = weight_quantizer.binarizer(weight_quantizer.data) data = packer.run(op_data.astype(np.float32), weight_quantizer.dimension) # Create the new constant with the quantized weights oh = conv_node.height ow = conv_node.width od = conv_node.channel kh = conv_node.kernel_height kw = conv_node.kernel_width kd = conv_node.input_ops['X'].channel quantized_constant = Constant(weight_quantizer.name + '_new', Uint32(), data, packed=True, actual_shape=weight_quantizer.shape, transposed_data=_transpose_kernels( data, oh, ow, od, kh, kw, kd)) # get nodes to be removed after being disconnected get_nodes_in_branch(weight_quantizer, None, to_be_removed) # Add the constant to the graph and connect the new constant graph.add_op(quantized_constant) quantized_constant.add_outputs(weight_quantizer.output_ops) for output_name, consumer_list in weight_quantizer.output_ops.items(): for consumer_node in consumer_list: for input_name, input_node in consumer_node.input_ops.items(): if input_node == weight_quantizer: consumer_node.add_input(input_name, quantized_constant) break for op in to_be_removed: graph.remove_op(op)
def pass_simplify_batchnorm(graph: Graph) -> None: """Simplify BarchNorm operator. """ exec_list = [ x for x in sort_graph(graph) if x.op_type == 'BatchNormalization' ] to_be_removed = [] for node in exec_list: scale = node.input_ops['scale'] if scale.op_type != 'Constant': raise RuntimeError('scale for BatchNormalization must be Constant') B = node.input_ops['B'] if B.op_type != 'Constant': raise RuntimeError('B for BatchNormalization must be Constant') mean = node.input_ops['mean'] if mean.op_type != 'Constant': raise RuntimeError('mean for BatchNormalization must be Constant') var = node.input_ops['var'] if var.op_type != 'Constant': raise RuntimeError('var for BatchNormalization must be Constant') new_name = node.name + '_optimized' new_scale_data = scale.data / np.sqrt(var.data + node.epsilon) new_scale = Constant(new_name + '_scale', scale.dtype, new_scale_data, dimension_format=scale.dimension) new_bias_data = B.data - new_scale_data * mean.data new_bias = Constant(new_name + '_bias', B.dtype, new_bias_data, dimension_format=B.dimension) new_op = BatchNormalizationOptimized(new_name, node.shape, node.dtype, { 'X': node.input_ops['X'], 'scale': new_scale, 'bias': new_bias }, dimension_format=node.dimension) new_scale.add_output('output', new_op) new_bias.add_output('output', new_op) input_op = node.input_ops['X'] update_key = None new_outputs = [new_op] for key, inout_ops in input_op.output_ops.items(): if node in inout_ops: update_key = key for op in inout_ops: if op != node: new_outputs.append(op) if update_key is not None: input_op.remove_output(update_key) input_op.add_outputs({update_key: new_outputs}) out_ops = node.output_op_list for op in out_ops: update_key = None for key, outin_op in op.input_ops.items(): if outin_op == node: update_key = key if update_key is not None: op.add_input(update_key, new_op) new_op.add_output('Y', op) graph.add_op(new_scale) graph.add_op(new_bias) graph.add_op(new_op) to_be_removed += [node, scale, B, mean, var] for node in to_be_removed: graph.remove_op(node)