def expression(params, in_tensors, qrec: QuantizationRecordBase, details=None): if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") return qrec.get_outputs(params, params.execute(in_tensors, details=details), ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] in_tensor = in_tensor.transpose(params.in_dims[0].transpose_to_order( ("h", "w", "c"))) w_out = params.out_dims[0].w h_out = params.out_dims[0].h c_out = params.out_dims[0].c w_in = params.in_dims[0].w h_in = params.in_dims[0].h wstep = (w_in - 1) / (w_out - 1) hstep = (h_in - 1) / (h_out - 1) out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): h_rounded = int(round(hstep * i)) for j in range(w_out): w_rounded = int(round(wstep * j)) out_tensor[i, j, :] = in_tensor[h_rounded, w_rounded, :] out_tensor = out_tensor.transpose( params.out_dims[0].transpose_from_order(("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float32")
def softmax(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs(params, [softmax_func(in_tensor)], ktype="float32")
def replace_function(self, G: NNGraph, subgraph: GraphView): step = 0 for node in subgraph.nodes(): node.step_idx = step step = step + 1 if isinstance(node, FcParameters): linear_name = node.name + "_fusion" break LOG.info("fusing nodes %s", ",".join( (node.name for node in subgraph.nodes()))) # simple node order is necessary because nodes() will not necessarily # be in order pnode = ConvFusionParameters(linear_name, fusion_type="linear_active", subgraph=subgraph) if G.quantization: qrecs = G.quantization.get_all(pnode.contained_nodes()) if qrecs: if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord( in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in pnode.contained_nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec return pnode, None, None
def replace_function(self, G: GraphView, subgraph: GraphView): relu_node = None constant_node = None mul_node = None for node in subgraph.nodes(): if isinstance(node, ReluActivationParameters): relu_node = node elif isinstance(node, ConstantInputParameters): constant_node = node elif isinstance(node, MatrixMulParameters): mul_node = node activation = HSigmoidActivationParameters(mul_node.name + "_fused_close_hsigmoid", offset=0) if G.quantization: reluqrec = G.quantization[NodeId(relu_node)] mulqrec = G.quantization[NodeId(mul_node)] del G.quantization[NodeId(constant_node)] if isinstance(reluqrec, (SymmetricQuantizationRecord)): pqrec = SymmetricQuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) elif isinstance(reluqrec, (MultQuantizationRecord)): pqrec = MultQuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) elif isinstance(reluqrec, (Float32QuantizationRecord)): pqrec = Float32QuantizationRecord(in_qs=reluqrec.in_qs, out_qs=mulqrec.out_qs) else: raise NotImplementedError() G.quantization[NodeId(activation)] = pqrec return activation, None, None
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] in_tensor = in_tensor.transpose(params.in_dims[0].transpose_to_order( ("h", "w", "c"))) w_out = params.out_dims[0].w h_out = params.out_dims[0].h c_out = params.out_dims[0].c w_in = params.in_dims[0].w h_in = params.in_dims[0].h wstep = (w_in - 1) / w_out hstep = (h_in - 1) / h_out out_tensor = np.empty((h_out, w_out, c_out)) for i in range(h_out): y_l, y_h = math.floor(hstep * i), math.ceil(hstep * i) hc = (hstep * i) - y_l for j in range(w_out): x_l, x_h = math.floor(wstep * j), math.ceil(wstep * j) wc = (wstep * j) - x_l P1 = in_tensor[y_l, x_l, :] P2 = in_tensor[y_l, x_h, :] P3 = in_tensor[y_h, x_l, :] P4 = in_tensor[y_h, x_h, :] out_tensor[i, j, :] = P1 * (1 - wc) * (1 - hc) \ + P2 * wc * (1 - hc) \ + P3 * (1 - wc) * hc \ + P4 * wc * hc out_tensor = out_tensor.transpose( params.out_dims[0].transpose_from_order(("h", "w", "c"))) return qrec.get_outputs(params, [out_tensor], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() details = kwargs.get('details') if details is not None: current_control = SymbolStats() Symbol.set_default_control(current_control) results = {} else: results = None current_control = None in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") in_vars = { params.input_symbols[i]: in_tensor for i, in_tensor in enumerate(in_tensors) } out_vars = params.func_col(**in_vars, calculate_ranges=current_control is not None, track_results=results) out_tensors = [ out_vars[out_sym_name] for out_sym_name in params.output_symbols ] if current_control: details.update(current_control.stats) details['results'] = results return qrec.get_outputs(params, out_tensors, ktype="float32")
def replace_function(self, G: NNGraph, subgraph: GraphView): nodes = list(subgraph.nodes()) pnode = ActivationFusion(nodes[0].name + "fusion", nodes[0].op_name + "_active", subgraph) nodes[0].step_idx = 0 nodes[1].step_idx = 1 LOG.debug("fused nodes %s", ",".join((node.name for node in nodes))) if G.quantization: qrecs = G.quantization.get_all(subgraph.nodes()) if qrecs: if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in subgraph.nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec return pnode
def sigmoid(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs(params, [1 / (1 + np.exp(-in_tensor))], ktype="float32")
def cast(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] out_tensors = [in_tensor] return qrec.get_outputs(params, out_tensors, ktype="float32")
def replace_function(self, G: GraphView, subgraph: GraphView): if not self.validate_match(subgraph): raise DontReplaceError() step = 0 for node in subgraph.nodes(): node.step_idx = step step = step + 1 if isinstance(node, Conv2DParameters): conv_name = node.name + "_fusion" break LOG.debug("fused nodes %s", ",".join((node.name for node in subgraph.nodes()))) # simple node order is necessary because nodes() will not necessarily # be in order pnode = ConvFusionParameters(conv_name, fusion_type=self.fusion_type, subgraph=subgraph) if G.quantization: qrecs = G.quantization.get_all(pnode.contained_nodes()) if qrecs: if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in pnode.contained_nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec return pnode, None, None
def match(self, G: GraphView, set_identity: bool = True): has_modified_graph = False for conv_node in [params for params in G.nodes() if isinstance(params, Conv2DParameters)]: node_list = self.get_node_list(G, conv_node) if node_list is None or len(node_list.order) < 2: continue if node_list.fusion_type == 'conv_active_pool': if node_list.pool.pool_type == "average": node_list.order = node_list.order[:2:] node_list.pool = None elif node_list.fusion_type == 'conv_pool_active': if node_list.pool.pool_type == "average" and node_list.active.activation != "relu": continue LOG.info("fusing nodes %s", ",".join((node.name for node in node_list.order))) has_modified_graph = True subgraph = GraphView() last_node = None for node in node_list.order: if last_node is not None: subgraph.add_edge(NNEdge(from_node=last_node, to_node=node)) last_node = node input_mapping = [[(node_list.conv, idx)] for idx in range(3)] output_mapping = [(last_node, 0)] pnode = ConvFusionParameters( node_list.conv.name + '_fusion', fusion_type=node_list.fusion_type, subgraph=subgraph, in_dims_hint=node_list.conv.in_dims_hint, out_dims_hint=node_list.conv.out_dims_hint, input_mapping=input_mapping, output_mapping=output_mapping) if G.quantization: qrecs = G.quantization.get_all(pnode.contained_nodes()) if qrecs: prec = None if isinstance(qrecs[0], (SymmetricQuantizationRecord, SymmetricScalableFilterQuantizationRecord)): prec = SymmetricQuantizationRecord( in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (MultQuantizationRecord, MultScalableFilterQuantizationRecord)): prec = MultQuantizationRecord(in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) elif isinstance(qrecs[0], (Float32QuantizationRecord, Float32ScalableFilterQuantizationRecord)): prec = Float32QuantizationRecord( in_qs=qrecs[0].in_qs, out_qs=qrecs[-1].out_qs) for node in pnode.contained_nodes(): G.quantization.move_to_fusion(node, pnode) G.quantization[NodeId(pnode)] = prec in_edges = G.in_edges(node_list.conv.name) out_edges = G.out_edges(last_node.name) for node in node_list.order: G.remove(node) for edge in in_edges: G.add_edge(NNEdge(edge.from_node, pnode, from_idx=edge.from_idx, to_idx=edge.to_idx)) for edge in out_edges: G.add_edge(NNEdge(pnode, edge.to_node, from_idx=edge.from_idx, to_idx=edge.to_idx)) if set_identity: self.set_identity(G) return has_modified_graph
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() details = kwargs['details'] in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] args = { params.INPUT_NAMES[idx]: in_tensors[idx] for idx in range(1, len(in_tensors)) } if params.always_reset_state: for state_key in params.STATE_PARAMETERS: args[state_key] = args[state_key].copy() assert in_tensor.shape[ 0] == params.n_input_cells, "input shape incorrect - n_input_cells" assert in_tensor.shape[ 1] == params.n_inputs, "input shape incorrect - n_inputs" if params.revert: in_tensor = np.flip(in_tensor, axis=0) out_tensor = np.zeros([params.n_output_cells, params.n_states]) out_idx = 0 if details is not None: details['range_state'] = { 'min': float('inf'), 'max': float('-inf') } if isinstance(params, LSTMParameters): details['range_cell'] = { 'min': float('inf'), 'max': float('-inf') } for idx in range(params.n_cells): res = cls.step_kernel(params, args, idx, in_tensor, details=details) if idx >= (params.n_cells - params.n_output_cells): out_tensor[out_idx] = res out_idx += 1 if details is not None: details['range_state']['min'] = min( details['range_state']['min'], res.min()) details['range_state']['max'] = max( details['range_state']['max'], res.max()) if isinstance(params, LSTMParameters): details['range_cell']['min'] = min( details['range_cell']['min'], args['c_state'].min()) details['range_cell']['max'] = max( details['range_cell']['max'], args['c_state'].max()) if params.revert: out_tensor = np.flip(out_tensor, axis=0) if params.output_directions: out_tensor = np.expand_dims(out_tensor, 0) return [out_tensor]
def leaky(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] output = in_tensor * (in_tensor > 0) + in_tensor * params.leak_factor * (in_tensor < 0) return qrec.get_outputs(params, [output], ktype="float32")
def hsigmoid(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs( params, [np.minimum(np.maximum(in_tensor + params.offset, 0), 6) / 6], ktype="float32")
def execute_piecewise(cls, params, in_tensors, qrec: QuantizationRecordBase, op, **kwargs): del kwargs if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") return qrec.get_outputs(params, [op(in_tensors[0], in_tensors[1])], ktype="float32")
def transpose(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] if params.transpose_in: in_tensor = np.transpose(in_tensor, params.transpose_in[0]) return qrec.get_outputs(params, [in_tensor], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs(params, [1 / (1 + np.exp(-in_tensor))], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") return qrec.get_outputs(params, params.execute(in_tensors), ktype="float32")
def constant_input(params, in_tensors, qrec: QuantizationRecordBase, details=None): del in_tensors, details if qrec is None: qrec = Float32QuantizationRecord() return qrec.get_outputs(params, [params.value], ktype="float32")
def piecewise(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() func = PIECEWISE_OPS[params.__class__] op = func['op'] in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") return qrec.get_outputs(params, [op(in_tensors[0], in_tensors[1])], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] output = in_tensor * ( in_tensor > 0) + in_tensor * params.leak_factor * (in_tensor < 0) return qrec.get_outputs(params, [output], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() old_err = np.seterr(over='raise') in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] in_tensor = softmax_func(in_tensor) np.seterr(**old_err) return qrec.get_outputs(params, [in_tensor], ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs( params, [np.minimum(np.maximum(in_tensor + params.offset, 0), 6) / 6], ktype="float32")
def sum_execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs(params, [np.sum(in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims)], ktype="float32")
def matscale(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") if len(params.in_dims) == 3: return qrec.get_outputs( params, [in_tensors[0] * in_tensors[1] * in_tensors[2]], ktype="float32") return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1]], ktype="float32")
def strided_slice(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() params = typing_cast(StridedSliceParameters, params) in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] out_tensors = [params.numpy_slice(in_tensor)] return qrec.get_outputs(params, out_tensors, ktype="float32")
def execute(cls, params, in_tensors, qrec: QuantizationRecordBase, **kwargs): if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") if len(params.in_dims) == 3: return qrec.get_outputs( params, [in_tensors[0] * in_tensors[1] * in_tensors[2]], ktype="float32") return qrec.get_outputs(params, [in_tensors[0] * in_tensors[1]], ktype="float32")
def concat(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensors = qrec.prepare_inputs(params, in_tensors, ktype="float32") if params.transpose_in: in_tensors = [(np.transpose(in_tensor, params.transpose_in[idx]) if params.transpose_in[idx] else in_tensor) for idx, in_tensor in enumerate(in_tensors)] out_tensor = np.concatenate(in_tensors, params.axis) if params.transpose_out: out_tensor = np.transpose(out_tensor, params.transpose_out[0]) return qrec.get_outputs(params, [out_tensor], ktype="float32")
def sum_global_pool(params, in_tensors, qrec: QuantizationRecordBase, details=None): del details if qrec is None: qrec = Float32QuantizationRecord() in_tensor = qrec.prepare_inputs(params, in_tensors, ktype="float32")[0] return qrec.get_outputs(params, [ np.sum(in_tensor, axis=tuple(params.axis), keepdims=params.keep_dims) ], ktype="float32")