def replace_pattern(self, graph: Graph, match: Dict[str, Node]): log.debug('GemmToFullyConnected is triggered') gemm = match['gemm'] A = gemm.in_node(0) B = gemm.in_node(1) B_consumers = graph.out_edges(B.node) C = gemm.in_node(2) if not (B.value is not None and C.value is not None and A.shape is not None and not gemm.transpose_a and (len(B_consumers) == 1 or not gemm.transpose_b)): log.warning('Cannot convert Gemm to FullyConnected') return if gemm.transpose_b: # B.value = B.value.transpose() # B.shape = np.array(B.value.shape, dtype=np.int64) gemm.transpose_b = 0 else: B.value = B.value.transpose() B.shape = np.array(B.value.shape, dtype=np.int64) gemm['out-size'] = gemm.out_port(0).data.get_shape()[-1] gemm['type'] = 'FullyConnected' gemm['channel_dims'] = len(match['output'].shape) - 1 gemm['bias_addable'] = True gemm['input_channel_dim'] = 1 # MatMul weights in IO gemm['output_channel_dim'] = 0 gemm['layout'] = 'NCHW' gemm.in_port(1).bin = 'weights' bias_node = Add(graph, {}).create_node() gemm.out_port(0).get_connection().set_source(bias_node.out_port(0)) gemm.in_port(2).get_connection().set_destination(bias_node.in_port(1)) gemm.out_port(0).connect(bias_node.in_port(0)) assign_dims_to_weights(gemm.in_node(1), None, 1, 0, 2)
def replace_sub_graph(self, graph: Graph, match: dict): # This replacer replace ImageScalar operation to Mul->Add sequence # Also it check that weights and biases are good op = match['op'] # Check that weights and biases are not useless has_bias, has_weights = True, True if all([x == 1 for x in np.nditer(op.scale)]): has_weights = False if all([x == 0 for x in np.nditer(op.bias)]): has_bias = False assert len(op.in_ports()) == 1 last_port = op.in_port(0).get_source() # Create Mul & Add nodes if has_weights: mul_weights = Const(graph, dict(value=op.scale, shape=op.scale.shape)).create_node() mul_op = Mul(graph, dict(name=op.id + '/mul_')).create_node() op.in_port(0).get_connection().set_destination(mul_op.in_port(0)) mul_weights.out_port(0).connect(mul_op.in_port(1)) last_port = mul_op.out_port(0) if has_bias: add_bias = Const(graph, dict(value=op.bias, shape=op.bias.shape)).create_node() add_op = Add(graph, dict(name=op.id + '/add_')).create_node() last_port.get_connection().set_destination(add_op.in_port(0)) add_bias.out_port(0).connect(add_op.in_port(1)) last_port = add_op.out_port(0) op.in_port(0).disconnect() op.out_port(0).get_connection().set_source(last_port)
def replace_pattern(self, graph: Graph, match: dict): assert match['operator'].has('multiplication_transparent_ports') quantize = match['quantize'] # This pass is applicable for binarization only. Other intX variants are not relevant. if quantize.levels != 2: return port = match['operator'].input_ports_with(match['quantized']) assert len(port) >= 1 if len(port) > 1: log.debug('BinarizeWeightsM1P1 cannot apply transformation for data {} because it consumed more' ' than once'.format(match['quantized'].name)) return assert len(port) == 1 port = port[0] applicable = [pair for pair in match['operator'].multiplication_transparent_ports if pair[0] == port] if len(applicable) == 0: return # Look at 3-rd and 4-th inputs of Quantize -- they have constants that should be passed through. # Assume that the constant that should be passed through is a scalar. output_low = quantize.in_node(3) output_high = quantize.in_node(4) assert len(output_low.out_nodes()) == 1 assert len(output_high.out_nodes()) == 1 if not output_low.has_valid('value') and not output_high.has_valid('value'): return output_low = output_low.value output_high = output_high.value operator = match['operator'] if np.all(np.isclose(output_low, 0)) and np.all(np.isclose(output_high, 1)): weights = operator.in_node(1).value reduction_indices = set(range(len(weights.shape))) - set([operator.output_feature_channel]) weights_reduced = np.add.reduce(weights, axis=tuple(reduction_indices)) weights_reduced = weights_reduced.reshape([len(weights_reduced), 1, 1]) add_term = Const(graph, {'value': weights_reduced}).create_node() add = Add(graph, {}).create_node() add.in_port(1).connect(add_term.out_port(0)) mul_term = Const(graph, {'value': np.array(0.5)}).create_node() mul = Mul(graph, {}).create_node() mul.in_port(1).connect(mul_term.out_port(0)) add.out_port(0).connect(mul.in_port(0)) operator.out_port(0).get_connection().set_source(mul.out_port(0)) add.in_port(0).connect(operator.out_port(0)) operator['pad_value'] = float(-1.0) elif np.all(np.isclose(output_low, -1)) and np.all(np.isclose(output_high, +1)): pass else: log.debug('ConvToBinaryConv: cannot apply transformation because input range is neither in [0, +1] nor ' 'in [-1, +1].') return operator['type'] = 'BinaryConvolution' operator['mode'] = 'xnor-popcount' operator['input'] = operator.in_node(0).shape[1] # Weights are not bit-packed yet; there should be a separate transformation to do that assert output_low.size == 1 assert output_high.size == 1 output_low = quantize.in_node(3) output_high = quantize.in_node(4) # Make sure that low/high values are exactly 0/1 output_low.value = np.zeros(output_low.shape) output_high.value = np.ones(output_high.shape)