def quantize(self, G: NNGraph) -> OrderedDict: edge_recs = {} result = OrderedDict() for step in G.graph_state.steps: node = step['node'] if isinstance(node, InputParameters): in_qs = [] else: in_qs = [ edge_recs[edge.params] for edge in G.indexed_in_edges(node.name) ] if isinstance(node, FusionParameters): fin_qs = in_qs for fnode in node.contained_nodes(): qrec = self.calculate_q( fnode, self._activation_stats.get(NodeId(node, fnode)), self._filter_stats.get(NodeId(node, fnode)), fin_qs, self._min_qsnr, self._force_width) result[NodeId(node, fnode)] = qrec fin_qs = qrec.out_qs qrec = QuantizationRecord(in_qs=in_qs, out_qs=fin_qs) else: qrec = self.calculate_q( node, self._activation_stats.get(NodeId(node, None)), self._filter_stats.get(NodeId(node, None)), in_qs, self._min_qsnr, self._force_width) result[NodeId(node, None)] = qrec if not qrec: break for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx] return result
def quantize(self, G: NNGraph) -> OrderedDict: '''quantize the graph''' if G.has_quantized_parameters: self.dequantize(G) G.has_quantized_parameters = False G.quantization = None self.qrecs = QuantizationSet() edge_recs = {} opts = { 'force_width': self._force_width, 'quantized_dimension': self._quantized_dimension, 'narrow_weights': self._narrow_weights } opts.update(self._options) quant_kwargs = { 'opts': opts, 'all_stats': self._activation_stats, 'G': G, 'qrecs': self.qrecs } dtype = WIDTH_TO_DTYPE[self._force_width] self.quantize_forward(edge_recs, dtype=dtype, **quant_kwargs) self.qrecs['__quantizer'] = self G.graph_identity.quantization_type = 'SQ8' return self.qrecs
def quantize(self, G: NNGraph) -> OrderedDict: '''quantize the graph''' if G.has_quantized_parameters: self.dequantize(G) G.has_quantized_parameters = False G.quantization = None edge_recs = {} dtype = WIDTH_TO_DTYPE[self._force_width] qrecs = self.quantize_forward(G, edge_recs, dtype) qrecs['__quantizer'] = self G.graph_identity.quantization_type = 'SQ8' return qrecs
def propagate_downwards(G: NNGraph): for node in G.dfs(): # First propagate the in dim hints to the out dim hints # Any node that does not want this to happen should set its out dim hints if node.in_dims_hint is not None: if isinstance(node, ReshapeParameters): if len(node.old_shape) == len(node.in_dims_hint[0]): LOG.debug("set reshape %s in dims hint %s", node.name, node.in_dims_hint[0]) node.old_shape.apply_naming_hints(node.in_dims_hint[0]) elif isinstance(node, GlobalPoolParameters): if node.keep_dims: node.out_dims_hint = deepcopy(node.in_dims_hint) elif isinstance(node, MatrixBroadcastedLinearOpParameters): max_hint = None for hint in node.in_dims_hint: if hint is not None and (max_hint is None or len(hint) > len(max_hint)): max_hint = hint if max_hint is not None: node.out_dims_hint = [max_hint] elif isinstance(node, ConcatParameters): # if any incoming edge of the concat doesn't have a hint # set it the same as the others any_in_hint = next( (hint for hint in node.in_dims_hint if hint is not None), None) if any_in_hint: LOG.debug("set concat %s in dims hint %s", node.name, any_in_hint) for edge in G.in_edges(node.name): if not node.in_dims_hint[edge.to_idx]: node.in_dims_hint[edge.to_idx] = any_in_hint node.out_dims_hint = [any_in_hint] else: if node.out_dims_hint is None: node.out_dims_hint = deepcopy(node.in_dims_hint) # if we have an out dim hint then propagate it to downstream nodes if node.out_dims_hint is not None: LOG.debug("propagate down hint from %s", node.name) for edge in G.out_edges(node.name): hint = node.out_dims_hint[edge.from_idx] if hint is None: continue if edge.to_node.in_dims_hint is None: edge.to_node.in_dims_hint = SparseList() if edge.to_node.in_dims_hint[edge.to_idx] is None: edge.to_node.in_dims_hint[edge.to_idx] = hint
def quantize_forward(self, G: NNGraph, edge_recs, result=None): if result is None: result = QuantizationSet() for node in [step['node'] for step in G.graph_state.steps]: LOG.debug("quantize forward %s", node.name) in_qs = self.get_in_qs(G, edge_recs, node) if isinstance(node, ConvFusionParameters): qrec, qrecs = self.quantize_fusion(G, node, in_qs) for node_id, fqrec in qrecs.items(): result[node_id] = fqrec elif isinstance(node, ConcatParameters): qrec = self.quantize_backward(G, result, edge_recs, node) else: qrec = self.calculate_q( node, self._activation_stats.get(NodeId(node, None)), self._filter_stats.get(NodeId(node, None)), in_qs, self._min_qsnr, self._force_width) result[NodeId(node, None)] = qrec if not qrec: break for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx] return result
def replace_function(self, G: NNGraph, subgraph: GraphView): step = 0 for node in subgraph.nodes(): node.step_idx = step step = step + 1 if isinstance(node, FcParameters): linear_name = node.name + "_fusion" break LOG.info("fusing nodes %s", ",".join( (node.name for node in subgraph.nodes()))) # simple node order is necessary because nodes() will not necessarily # be in order pnode = ConvFusionParameters(linear_name, fusion_type="linear_active", subgraph=subgraph) if G.quantization: qrecs = G.quantization.get_all(pnode.contained_nodes()) if qrecs: if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord( in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in pnode.contained_nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec return pnode, None, None
def replace_function(self, G: NNGraph, subgraph: GraphView): nodes = list(subgraph.nodes()) pnode = ActivationFusion(nodes[0].name + "fusion", nodes[0].op_name + "_active", subgraph) nodes[0].step_idx = 0 nodes[1].step_idx = 1 LOG.debug("fused nodes %s", ",".join((node.name for node in nodes))) if G.quantization: qrecs = G.quantization.get_all(subgraph.nodes()) if qrecs: if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in subgraph.nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec return pnode
def propagate_upwards(G: NNGraph): for node in G.dfs(reverse=True): # First propagate the out dim hints to the in dim hints # Any node that does not want this to happen should set its in dim hints if node.out_dims_hint is not None: if isinstance(node, ReshapeParameters): if len(node.shape) < len(node.out_dims_hint[0]): node.shape = Dim.unnamed(( [1] * (len(node.out_dims_hint[0]) - len(node.shape))) + node.shape.shape) node.shape.apply_naming_hints(node.out_dims_hint[0]) if node.in_dims_hint is None: node.in_dims_hint = SparseList( [["%s" % i for i in range(len(node.old_shape))]]) elif isinstance(node, MatrixBroadcastedLinearOpParameters): node.in_dims_hint = [node.out_dims_hint[0]] * 2 elif isinstance(node, MatrixMulParameters): continue elif isinstance(node, GlobalPoolParameters): if node.keep_dims: node.in_dims_hint = deepcopy(node.out_dims_hint) elif isinstance( node, ConstantInputParameters) and not node.dims.is_named: node.dims.apply_naming_hints(node.out_dims_hint[0]) else: if node.in_dims_hint is None: node.in_dims_hint = deepcopy(node.out_dims_hint) # if we have an in dim hint then propagate it to upstream nodes if node.in_dims_hint is not None: for edge in G.in_edges(node.name): hint = node.in_dims_hint[edge.to_idx] if hint is None: continue if edge.from_node.out_dims_hint is None: edge.from_node.out_dims_hint = SparseList() if edge.from_node.out_dims_hint[edge.from_idx] is None: edge.from_node.out_dims_hint[edge.from_idx] = hint if isinstance(edge.from_node, InputParameters): assert edge.from_idx == 0, "input node should only have one output" dims_len = len(edge.from_node.dims) hint_len = len(hint) if dims_len < hint_len: edge.from_node.dims = Dim.unnamed( [1] * (hint_len - dims_len) + edge.from_node.dims.shape)
def _common(cls, node, **kwargs): all_nodes = kwargs['all_nodes'] G = kwargs['G'] valid_name = kwargs['valid_name'] inputs = [all_nodes[inp] for inp in node.input] if not all(cls.is_constant(inp) for inp in inputs): raise NotImplementedError( "nntool does not support import of graphs with evaluated loops" ) importer = kwargs['importer'] sub_G = NNGraph() all_nodes_clone = all_nodes.copy() importer.import_subgraph(sub_G, node.attrs['body'], {}, all_nodes=all_nodes_clone) if not all( isinstance(inp, (InputParameters, ConstantInputParameters)) for inp in sub_G.inputs()): raise NotImplementedError( "nntool does not support import of graphs with evaluated loops" ) sub_G.add_dimensions() for idx, inp in enumerate(sub_G.inputs()): inp.index = idx logger.info(f"reducing loop {valid_name} to a constant") count = inputs[0][0].value keep_going = inputs[1][0].value loop_carried = [inp[0].value for inp in inputs[2:]] outputs = [np.array([])] * len(node.output) while keep_going and count > 0: executer = GraphExecuter(sub_G) output_tensors = executer.execute([count, keep_going] + loop_carried, silent=True) outp_vals = [ output_tensors[node.step_idx][0] for node in sub_G.outputs() if not isinstance(node, InputParameters) ] keep_going = outp_vals[0] for idx, val in enumerate(outp_vals[1:]): if idx < len(loop_carried): loop_carried[idx] = outputs[idx] = val elif outputs[idx] is None: outputs[idx] = val else: outputs[idx] = np.concatenate((outputs[idx], val)) count -= 1 for idx, outp in enumerate(node.output): params = ConstantInputParameters( G.unique_name(f'{valid_name}_out{idx}'), value=outputs[idx], dims=Dim.unnamed(outputs[idx].shape)) all_nodes[outp] = (params, 0, ProvisionalDim(outputs[idx].shape), None) return None
def dequantize(self, G: NNGraph): qrecs = G.quantization LOG.info("dequantizing graph parameters") for _, node, _, fnode in G.nodes_iterator(): qrec = qrecs[NodeId(node, fnode)] anode = node if fnode is None else fnode handler = self.handlers[1].get(anode.__class__) if handler: handler.dequantize(anode, qrec)
def initialize_edge_recs(G: NNGraph, qrecs): '''Initialize edge rec dictionary to current quantization settings''' edge_recs = {} for node in [step['node'] for step in G.graph_state.steps]: nodeid = NodeId(node) qrec = qrecs[nodeid] for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx] return edge_recs
def create_graph(filename, opts): cfg = read_cfg(filename) out_graph = NNGraph(model=cfg, filename=filename, name=opts.get('name'), value_cache=opts.get('value_cache')) create_subgraph(out_graph, cfg) leaf_nodes = list([n for n in out_graph.nodes()\ if out_graph.out_degree(n) == 0 and out_graph.in_degree(n) > 0]) for node in leaf_nodes: out_graph.add_edge(node, out_graph.add_output(), order=0) return out_graph
def create_graph(self, filename, opts): model = onnx.load(filename) self._name_cache = {} if model.ir_version < 3: opset_import = [make_opsetid(defs.ONNX_DOMAIN, 1)] else: opset_import = model.opset_import G = NNGraph(filename=filename, name=opts.get('name'), constant_store=ConstantStore()) return self._import_onnx_model(G, model.graph, opset_import, opts)
def create_graph(self, filename, opts) -> NNGraph: opts = self.get_opts(opts) model = onnx.load(filename) # onnx.checker.check_model(model) try: model = shape_inference.infer_shapes(model) except RuntimeError as ex: msg = "\n".join(f"> {line}" for line in str(ex).split("\n") if line) logger.warning( 'shape inference failed on onnx graph. ' f'This may not affect import.\nONNX runtime error was:\n{msg}') self._name_cache = {} if model.ir_version < 3: opset_import = [make_opsetid(defs.ONNX_DOMAIN, 1)] else: opset_import = model.opset_import G = NNGraph(filename=filename, name=opts.get('name')) G, qrecs = self._import_onnx_model(G, model.graph, opset_import, opts) G.add_dimensions(quiet=True) if qrecs: propagate_qrecs(G, qrecs) qset = QuantizationSet() qset.update(qrecs) qset.scheme_priority = ['SQ8'] qset.schemes_present = {'SQ8'} G.quantization = qset try: quantizer = NewQuantizer(G) quantizer.quantize() except ValueError as ex: logger.warning( f'unable to import quantization from FakeQuantize nodes correctly - {ex}' ) clean_dangling_nodes(G) MatchDuplicateConstants().match(G) return G
def add_node(G: NNGraph, node: Node, anode: Node = None) -> str: G.add_node(node) if not anode: return (node.name, node.name) G.add_node(anode) G.add_edge(NNEdge(node, anode)) return (node.name, anode.name)
def fix_split_in_edges(G: NNGraph): for split in [node for node in G.nodes() if isinstance(node, SplitParameters)]: in_edge = G.in_edges(split.name)[0] if in_edge.to_idx == 0: continue G.remove_edge(in_edge) G.add_edge(NNEdge(in_edge.from_node, in_edge.to_node, from_idx=in_edge.from_idx))
def propagate_downwards(G: NNGraph): for node in G.dfs(): # First propagate the in dim hints to the out dim hints # Any node that does not want this to happen should set its out dim hints if node.in_dims_hint is not None: if isinstance(node, ReshapeParameters): assert len(node.old_shape) == len(node.in_dims_hint[0]), "reshape doesn't match input" node.old_shape.apply_naming_hints(node.in_dims_hint[0]) else: if node.out_dims_hint is None: node.out_dims_hint = deepcopy(node.in_dims_hint) # if we have an out dim hint then propagate it to downstream nodes if node.out_dims_hint is not None: for edge in G.out_edges(node.name): hint = node.out_dims_hint[edge.from_idx] if edge.to_node.in_dims_hint is None: edge.to_node.in_dims_hint = SparseList() if edge.to_node.in_dims_hint[edge.to_idx] is None: edge.to_node.in_dims_hint[edge.to_idx] = hint
def dequantize(self, G: NNGraph): qrecs = G.quantization LOG.info("dequantizing graph parameters") for _, node, _, fnode in G.nodes_iterator(): qrec = qrecs[NodeId(node, fnode)] if isinstance(node, ConstantInputParameters): node.value = qrec.out_q[0].dequantize(node.value) else: anode = node if fnode is None else fnode if isinstance(anode, (FcParameters, Conv2DParameters)): if anode.has_bias: anode.biases = qrec.biases_q.dequantize(anode.biases) anode.weights = qrec.weights_q.dequantize(anode.weights)
def propagate_upwards(G: NNGraph): for node in G.dfs(reverse=True): # First propagate the out dim hints to the in dim hints # Any node that does not want this to happen should set its in dim hints if node.out_dims_hint is not None: if isinstance(node, ReshapeParameters): assert len(node.shape) == len(node.out_dims_hint[0]) node.shape.apply_naming_hints(node.out_dims_hint[0]) if node.in_dims_hint is None: node.in_dims_hint = SparseList([["%s" % i for i in range(len(node.old_shape))]]) else: if node.in_dims_hint is None: node.in_dims_hint = deepcopy(node.out_dims_hint) # if we have an in dim hint then propagate it to upstream nodes if node.in_dims_hint is not None: for edge in G.in_edges(node.name): hint = node.in_dims_hint[edge.to_idx] if edge.from_node.out_dims_hint is None: edge.from_node.out_dims_hint = SparseList() if edge.from_node.out_dims_hint[edge.from_idx] is None: edge.from_node.out_dims_hint[edge.from_idx] = hint
def report(self, G: NNGraph, stats): dump_stats = OrderedDict() for step_idx, node, fusion_idx, fnode in G.nodes_iterator(self._yield_fusions): stat = stats[NodeId(node, fnode)] stat = copy.deepcopy(stat) if fusion_idx: name = "{}_{}".format(node.name, fusion_idx) idx = "{}_{}".format(step_idx, fusion_idx) else: name = node.name idx = str(step_idx) dump_stats[name] = stat stat['idx'] = idx return dump_stats_table(dump_stats, do_totals=self._do_totals, threshold=self._threshold)
def create_graph(filename, opts): buf = open(filename, "rb").read() model = Model.Model.GetRootAsModel(buf, 0) LOG.info("Importing TFLITE model version %s", model.Version()) check(model.Version() == 3, "Only support version 3 graphs at present") check(model.SubgraphsLength() == 1, "Only supports one subgraph at present") G = NNGraph(model=model, filename=filename, name=opts.get('name'), value_cache=opts.get('value_cache')) create_subgraph(G, model, 0, load_tensors=opts.get('load_tensors'),\ dequantize=opts.get('dequantize')) return G
def create_graph(self, filename, opts): opts = self.get_opts(opts) model = onnx.load(filename) self._name_cache = {} if model.ir_version < 3: opset_import = [make_opsetid(defs.ONNX_DOMAIN, 1)] else: opset_import = model.opset_import G = NNGraph(filename=filename, name=opts.get('name'), constant_store=ConstantStore()) G = self._import_onnx_model(G, model.graph, opset_import, opts) clean_dangling_nodes(G) MatchDuplicateConstants().match(G) return G
def quantize_forward(self, G: NNGraph, edge_recs, dtype=np.int8): for node in [step['node'] for step in G.graph_state.steps]: LOG.debug("quantize forward %s", node.name) in_qs = self.get_in_qs(G, edge_recs, node) if isinstance(node, (ConvFusionParameters, ActivationFusion)): qrec = self.quantize_fusion(G, node, in_qs, dtype) else: qrec = self.calculate_q(G, node, self._activation_stats.get( NodeId(node, None)), in_qs, dtype) self.qrecs[NodeId(node, None)] = qrec if not qrec: break for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx]
def create_graph(self, filename, opts): opts = self.get_opts(opts) self._name_cache = {} add_sys_path(os.path.dirname(__file__)) buf = open(filename, "rb").read() model = Model.GetRootAsModel(buf, 0) LOG.info("Importing TFLITE model version %s", model.Version()) check(model.Version() == 3, "Only support version 3 graphs at present") if model.SubgraphsLength() > 1: LOG.warning("nntool only supports one subgraph. There may be errors loading this graph.") G = NNGraph(model=model, filename=filename, name=opts.get('name'), constant_store=ConstantStore()) if opts.get('load_quantization'): G.quantization = QuantizationSet() G.has_quantized_parameters = True G.graph_identity.quantization_types.add('SQ8') self._import_tflite_graph(G, TFLiteGraph.from_model(model, 0), opts) clean_dangling_nodes(G) fix_split_in_edges(G) MatchDuplicateConstants().match(G) G.add_dimensions() remove_concats(G) if opts['remove_quantize_ops']: RemoveQuantizeOperators().match(G) G.add_dimensions() if opts.get('load_quantization'): # get rid of qrecs on nodes that were not used to_remove = [] for nid in G.quantization: if nid.node_name not in G: to_remove.append(nid) for nid in to_remove: del G.quantization[nid] return G
def propagate_forward(self, G: NNGraph, edge_recs, start_node, new_out_qrec, result): '''Propagate a new output qrec at node start_node in the graph''' found_node = False for node in [step['node'] for step in G.graph_state.steps]: if found_node: LOG.debug("propagate forwards %s", node.name) in_qs = self.get_in_qs(G, edge_recs, node) if isinstance(node, ConvFusionParameters): qrec, qrecs = self.quantize_fusion(G, node, in_qs) for node_id, fqrec in qrecs.items(): result[node_id] = fqrec elif isinstance(node, ConcatParameters): qrec = self.quantize_backward(G, result, edge_recs, node) else: qrec = self.calculate_q( node, self._activation_stats.get(NodeId(node, None)), self._filter_stats.get(NodeId(node, None)), in_qs, self._min_qsnr, self._force_width) else: if node == start_node: found_node = True qrec = self.quantize_backward(G, result, edge_recs, node, force_out=new_out_qrec) else: continue result[NodeId(node, None)] = qrec if not qrec: break for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx]
def report(self, G: NNGraph, nodes=None) -> Tabular: if nodes is None: nodes = G.nodes() nodes = sorted(nodes, key=lambda x: x.step_idx) start_step = nodes[0].step_idx end_step = nodes[-1].step_idx steps = G.graph_state.steps liveness = G.graph_state.liveness first_node = steps[start_step]['node'] active_order = "x".join(first_node.out_dims[0].order) tab = Tabular() self.do_headers(active_order, tab) max_active = 0 tot_params = 0 tot_ops = 0 for i, node, active, params_size, ops in graph_walk(steps, liveness): if node.step_idx < start_step or node.step_idx > end_step: continue tot_params += params_size if ops: tot_ops += ops if active > max_active: max_active = active if self._show_constants or not isinstance(node, ConstantInputParameters): self.do_operation(node, G, tab, i, active, params_size, ops) if start_step != end_step: self.check_do_totals(tab, max_active, tot_params, tot_ops) return tab
def two_conv_graph(): G = NNGraph(name='two_conv_graph') ti = G.add_input(Dim.unnamed([10, 10, 2])) c1filt = Conv2DFilterDim(3, 3, 2, in_c=2) c1filt.impose_order(['out_c', 'h', 'w', 'in_c']) n1 = Conv2DParameters("node1", filt=c1filt, stride=StrideDim(1, 1), padding=PadDim(0), in_dims_hint=SparseList([['h', 'w', 'c']]), out_dims_hint=SparseList([['h', 'w', 'c']])) G.add_node(n1) w1 = [[0.25, 0.25], [0.25, 0.25], [0.25, 0.25]] w1 = [w1, w1, w1] w2 = [[0.75, 0.75], [0.75, 0.75], [0.75, 0.75]] w2 = [w2, w2, w2] n1.weights = np.array([w1, w2]) c2filt = Conv2DFilterDim(3, 3, 2, in_c=2) c2filt.impose_order(['out_c', 'h', 'w', 'in_c']) n2 = Conv2DParameters("node2", filt=c2filt, stride=StrideDim(1, 1), padding=PadDim(0), in_dims_hint=SparseList([['h', 'w', 'c']]), out_dims_hint=SparseList([['h', 'w', 'c']])) G.add_node(n2) w3 = [[0.75, 0.25], [0.75, 0.25], [0.75, 0.25]] w3 = [w3, w3, w3] n2.weights = np.array([w3, w3]) to = G.add_output() G.add_edge(NNEdge(ti, n1)) G.add_edge(NNEdge(n1, n2)) G.add_edge(NNEdge(n2, to)) G.add_dimensions() yield G
def report(self, G: NNGraph, nodes=None, graph_format='PDF', all_dims=False, filename=None, view=True, anonymise=False, expressions=False, quant_labels=False): if nodes is None: nodes = set(G.nodes()) self.init_name_cache() all_ports = {} graph_name = G.graphname if hasattr(G, 'graphname') else 'graph' dot = Digraph(comment=graph_name, format=graph_format, node_attr={'height': '.1'}, edge_attr={'fontsize': '10.0'}) fake_idx = 0 for node in G.dfs(): if node not in nodes: continue if expressions and isinstance(node, ExpressionFusionParameters): all_ports[node] = self.report_expression( dot, G, node, anonymise=anonymise, report_quantized=expressions == "quantized") else: num_in_edges = len(G.indexed_in_edges(node.name)) num_out_edges = len(G.indexed_out_edges(node.name)) ports = all_ports.setdefault(node, [None] * 2) names = self.build_nodebox(node, ports, num_in_edges, num_out_edges, anon=anonymise) dot.node(node.name, nohtml(names), shape='record', xlabel=str(node.step_idx)) for edge in G.in_edges(node.name): if edge.from_node not in nodes: if not all_dims: continue out_port, in_port = self.get_ports(all_ports, edge) if edge.from_node in nodes: from_node_id = self.get_from_id(all_ports, edge, out_port) to_node_id = self.get_to_id(all_ports, edge, in_port) dot.edge(from_node_id, to_node_id, xlabel=self.in_label(G, node, edge.to_idx, quant_labels)) else: fake_name = f'fake_{fake_idx}' fake_idx += 1 dot.node(fake_name, shape='point', fillcolor='black') to_node_id = self.get_to_id(all_ports, edge, in_port) dot.edge(fake_name, to_node_id, xlabel=self.in_label(G, node, edge.to_idx, quant_labels)) if not all_dims: continue for edge_group in G.indexed_out_edges(node.name): if any(edge.to_node in nodes for edge in edge_group): continue edge = edge_group[0] out_port, _ = self.get_ports(all_ports, edge) fake_name = f'fake_{fake_idx}' fake_idx += 1 dot.node(fake_name, shape='plaintext', label=' ', fillcolor='black') from_node_id = self.get_from_id(all_ports, edge, out_port) dot.edge(from_node_id, fake_name, xlabel=self.out_label(G, node, edge.from_idx, quant_labels)) # dot = dot.unflatten(stagger=2) if filename: dot.render(filename, cleanup=True) if view: filename = tempfile.mktemp('.gv') dot.view(filename, cleanup=True, quiet=True) self.reset_name_cache()
def quantize_backward(self, G: NNGraph, result, edge_recs, node, force_out=None): LOG.debug("quantize backwards %s", node.name) recalculated = False while True: in_qs = self.get_in_qs(G, edge_recs, node) if self.is_filter_node(node): if isinstance(node, ConvFusionParameters): qrec, qrecs = self.quantize_fusion(G, node, in_qs, force_out=force_out) for node_id, fqrec in qrecs.items(): result[node_id] = fqrec else: qrec = self.calculate_q(node, self._activation_stats.get( NodeId(node, None)), in_qs, self._force_width, force_out=force_out) if force_out and force_out.q is not None and qrec.out_qs[ 0].q < force_out.q: if recalculated: raise NotImplementedError( "no quantization solution found") bits_to_gain = force_out.q - qrec.q if bits_to_gain > in_qs[0].q: raise NotImplementedError() # Try to adjust the inputs to satisfy and then # recalculate pnode = G.in_edges(node.name)[0].from_node self.quantize_backward(G, result, edge_recs, pnode, force_out=QType(bits=force_out.bits, q=in_qs[0].q - bits_to_gain, signed=True)) elif isinstance(node, ConcatParameters): assert not recalculated max_width = max(in_q.bits for in_q in in_qs) min_q = min(in_q.q for in_q in in_qs) if force_out: if not self.satisfied(force_out.bits, max_width): max_width = force_out.bits if not self.satisfied(force_out.q, min_q): min_q = force_out.q LOG.debug("normalizing concat to %s", QType(bits=max_width, q=min_q, signed=True)) for pidx, pnode in enumerate( [edge.from_node for edge in G.in_edges(node.name)]): pqrec = in_qs[pidx] if pqrec.q != min_q or pqrec.bits != max_width: self.quantize_backward(G, result, edge_recs, pnode, force_out=QType(bits=max_width, q=min_q, signed=True)) o_q = QType(bits=max_width, q=min_q, signed=True) qrec = SymmetricQuantizationRecord(in_qs=self.get_in_qs( G, edge_recs, node), out_qs=[o_q]) elif isinstance(node, SoftMaxParameters): raise NotImplementedError( "softmax kernel cannot change width or q") else: if isinstance(node, ConvFusionParameters): qrec, qrecs = self.quantize_fusion(G, node, in_qs, force_out=force_out) for node_id, fqrec in qrecs.items(): result[node_id] = fqrec else: qrec = self.calculate_q(node, self._activation_stats.get( NodeId(node, None)), in_qs, self._force_width, force_out=force_out) o_q = qrec.out_qs[0] if not (self.satisfied(force_out.q, o_q.q) and self.satisfied(force_out.bits, o_q.bits)): if recalculated: raise NotImplementedError( "no quantization solution found") if len(G.in_edges(node.name)) > 1: raise NotImplementedError( "Nodes with multiple input edges \ need custom handling") pnode = G.in_edges(node.name)[0].from_node self.quantize_backward(G, result, edge_recs, pnode, force_out=force_out) for edges in G.indexed_out_edges(node.name): for edge in edges: edge_recs[edge.params] = qrec.out_qs[edge.from_idx] result[NodeId(node, None)] = qrec o_q = qrec.out_qs[0] if self.satisfied_force(force_out, o_q): break if recalculated: raise NotImplementedError("no quantization solution found") LOG.debug("recalculate %s", node.name) recalculated = True LOG.debug("back complete %s %s", node.name, qrec) return qrec
def report_graph(self, G: NNGraph, dot, all_ports, fake_idx, nodes=None, all_dims=False, anonymise=False, expressions=False, qrecs=None, fusions=False, parent=None): if nodes is None: nodes = set(G.nodes()) for node in G.dfs(): if node not in nodes: continue if isinstance(node, (FusionInputParameters)): continue if expressions and isinstance(node, ExpressionFusionParameters): all_ports[node] = self.report_expression( dot, G, node, anonymise=anonymise, report_quantized=expressions == "quantized") elif fusions and isinstance(node, FusionBase): all_ports[node] = self.report_fusion(dot, G, node, all_ports, fake_idx, all_dims=all_dims, anonymise=anonymise, expressions=expressions, qrecs=qrecs) else: num_in_edges = len(G.indexed_in_edges(node.name)) num_out_edges = len(G.indexed_out_edges(node.name)) ports = all_ports.setdefault(node, [None] * 2) if not isinstance(node, FusionOutputParameters): names = self.build_nodebox(node, ports, num_in_edges, num_out_edges, anon=anonymise) dot.node( node.name, nohtml(names), shape='record', xlabel=str(node.step_idx), color="blue" if node.is_not_generated else "black") for edge in G.in_edges(node.name): if edge.from_node not in nodes: if not all_dims: continue out_port, in_port = self.get_ports(all_ports, edge) if edge.from_node in nodes: from_node_id = self.get_from_id(all_ports, edge, out_port) to_node_id = self.get_to_id(all_ports, edge, in_port) edge_label, edge_error = self.in_label( G, edge, qrecs, parent=parent, from_node=not isinstance(edge.from_node, FusionInputParameters), to_node=not isinstance(edge.to_node, FusionOutputParameters)) dot.edge(from_node_id, to_node_id, xlabel=edge_label, color="red" if edge_error else "black") else: fake_name = f'fake_{fake_idx}' fake_idx += 1 dot.node(fake_name, shape='point', fillcolor='black') to_node_id = self.get_to_id(all_ports, edge, in_port) edge_label, edge_error = self.in_label(G, edge, qrecs, parent=parent) dot.edge(fake_name, to_node_id, xlabel=edge_label, color="red" if edge_error else "black") if not all_dims: continue for edge_group in G.indexed_out_edges(node.name): if any(edge.to_node in nodes for edge in edge_group): continue edge = edge_group[0] out_port, _ = self.get_ports(all_ports, edge) fake_name = f'fake_{fake_idx}' fake_idx += 1 dot.node(fake_name, shape='plaintext', label=' ', fillcolor='black') from_node_id = self.get_from_id(all_ports, edge, out_port) edge_label, edge_error = self.out_label( G, edge, qrecs, parent=parent, from_node=not isinstance(edge.from_node, FusionInputParameters), to_node=not isinstance(edge.to_node, FusionOutputParameters)) dot.edge(from_node_id, fake_name, xlabel=edge_label, color="red" if edge_error else "black")