def reverse_matmul(G: GraphView, params): # reverse edges in_edges = G.indexed_in_edges(params.name) for edge in in_edges[0:2:]: G.remove_edge(edge) other_idx = 1 for edge in in_edges[0:2:]: G.add_edge( NNEdge(from_node=edge.from_node, to_node=params, from_idx=edge.from_idx, to_idx=other_idx)) other_idx = 1 - other_idx nid = NodeId(params) if G.quantization and nid in G.quantization: qrec = G.quantization[nid] # swap qrecs qrec.in_qs[0], qrec.in_qs[1] = qrec.in_qs[1], qrec.in_qs[0] # add transposes in_nodes = [] for idx in range(2): tin_params = TransposeParameters( G.unique_name(f"{params.name}_tin{idx}"), transpose=(1, 0)) in_nodes.append(tin_params) G.insert_node_before(tin_params, params, to_idx=idx, edge_class=NNEdge) tout_params = TransposeParameters(G.unique_name(f"{params.name}_tout"), transpose=(1, 0)) G.insert_node_after(params, tout_params) return in_nodes, tout_params
def _execute(self, node, G): LOGL("%s", str(self)) params = TransposeParameters(G.unique_name(f'{node.name}'), transpose=self.transpose, block_search_up=self.block_search_up, block_search_down=self.block_search_down) G.insert_node_at_edge(params, self.edge, edge_class=NNEdge)
def _execute(self, node, G): info(f"{self}") direction = self.direction if self.reshape_from is not None: params = ReshapeParameters(G.unique_name(f'{node.name}_reshape'), old_shape=Dim.unnamed( self.reshape_from), shape=Dim.reshape_to(self.reshape_to)) self.do_insert(node, G, params, direction=direction) node = params direction = "out" params = TransposeParameters(G.unique_name(f'{node.name}_trans'), transpose=self.transpose) self.do_insert(node, G, params, direction=direction)
def apply_output_trans(self, G, node, trans: list, index=None): if index is None: start = 0 end = len(node.out_dims) else: start = index end = index + 1 for idx in range(start, end): params = TransposeParameters(G.unique_name(f"{node.name}_trans_out{idx}"), transpose=trans) G.insert_node_after( node, params, from_idx=idx, edge_class=NNEdge ) if node.out_dims_hint: node.out_dims_hint[idx] = apply_transpose(node.out_dims_hint[idx], self.invert(trans)) if G.quantization: G.quantization.copy_qrec(node, 'out', idx, params)
def apply_input_trans(self, G, node, trans: list, index=None): if index is None: start = 0 end = len(node.in_dims) else: start = index end = index + 1 for idx in range(start, end): params = TransposeParameters(G.unique_name(f"{node.name}_trans_in{idx}"), transpose=trans) G.insert_node_before( params, node, to_idx=idx, edge_class=NNEdge ) if node.in_dims_hint: node.in_dims_hint[idx] = apply_transpose(node.in_dims_hint[idx], trans) nid = NodeId(node) if G.quantization: G.quantization.copy_qrec(node, 'in', idx, params)
def match(self, G: GraphView, set_identity: bool = True): # get a list of all the nodes that are transposable but not transposes # Need to do this first to avoid mutating it when doing the modifications tnodes = list(filter(lambda n: isinstance(n, Transposable) and not isinstance(n, TransposeParameters), G.nodes())) for node in tnodes: if node.transpose_in: for idx, edge in enumerate(G.in_edges(node.name)): in_params = TransposeParameters("%s_TIN_%s" % (node.name, idx), transpose=node.transpose_in) if node.in_dims_hint: in_hint = node.in_dims_hint[edge.to_idx] out_hint = apply_reverse_transpose_to_hint(in_hint, node.transpose_in) in_params.in_dims_hint = [in_hint.copy()] in_params.out_dims_hint = [out_hint.copy()] node.in_dims_hint[edge.to_idx] = out_hint if G.quantization: G.quantization.copy_to_node(node, in_params) G.insert_node(in_params, edge.from_node.name, edge.to_node.name, from_idx=edge.from_idx, to_idx=edge.to_idx) node.transpose_in = None if node.transpose_out: for idx, edge in enumerate(G.out_edges(node.name)): out_params = TransposeParameters("%s_TOUT_%s" % (node.name, idx), transpose=node.transpose_out) if node.out_dims_hint: out_hint = node.out_dims_hint[edge.from_idx] in_hint = apply_reverse_transpose_to_hint(out_hint, node.transpose_out) out_params.in_dims_hint = [in_hint.copy()] out_params.out_dims_hint = [out_hint.copy()] node.out_dims_hint[edge.from_idx] = in_hint if G.quantization: G.quantization.copy_to_node(node, out_params) G.insert_node(out_params, edge.from_node.name, edge.to_node.name, from_idx=edge.from_idx, to_idx=edge.to_idx) node.transpose_out = None if set_identity: self.set_identity(G)
def _handle(cls, node, quantized=False, **kwargs): all_nodes = kwargs['all_nodes'] G = kwargs['G'] valid_name = kwargs['valid_name'] inputs = [all_nodes[inp] for inp in node.input] x = inputs[0] x_shape = cls._get_real_dim(x[2].shape) y_idx = 3 if quantized else 1 y = inputs[y_idx] y_shape = cls._get_real_dim(y[2].shape) if quantized: qrecs = kwargs['qrecs'] x_zp = cls.get_constant(inputs[2]) x_scale = cls.get_constant(inputs[1]) if len(x_scale) > 1: raise NotImplementedError('QMatMul scales must be scalar') x_qtype = QType(dtype=x_zp.dtype, scale=x_scale, zero_point=x_zp) y_zp = cls.get_constant(inputs[5]) y_scale = cls.get_constant(inputs[4]) if len(y_scale) > 1: raise NotImplementedError('QMatMul scales must be scalar') y_qtype = QType(dtype=y_zp.dtype, scale=y_scale, zero_point=y_zp) o_zp = cls.get_constant(inputs[7]) o_scale = cls.get_constant(inputs[6]) o_qtype = QType(dtype=o_zp.dtype, scale=o_scale, zero_point=o_zp) else: o_qtype = None if cls.is_linear(y, x_shape, y_shape): filt_dim = FcFilterDim(y_shape[1], x_shape[0]) weights = np.transpose(cls.get_constant(y), [1, 0]) weights_params = ConstantInputParameters( f'{valid_name}_weights', dims=Dim.unnamed([y_shape[1], x_shape[0]]), value=weights) cls.record_constant_qrec(y, weights_params, **kwargs) params = FcParameters( valid_name, filt=filt_dim, has_bias=True, # in_dims_hint=[ # ['c'], ['out_c', 'in_c'], ['out_c']], in_dims_hint=[None, ['out_c', 'in_c'], ['out_c']], out_dims_hint=[['c']]) out_dims = params.get_output_size([Dim.unnamed(x_shape)]) biases_params = ConstantInputParameters( f'{valid_name}_biases', dims=Dim.unnamed([y_shape[1]]), value=np.zeros((y_shape[1]), dtype=np.float32)) G.add_edge( NNEdge(from_node=weights_params, to_node=params, to_idx=1)) G.add_edge( NNEdge(from_node=biases_params, to_node=params, to_idx=2)) if quantized: weights_params.qtype = y_qtype qrecs[NodeId(params)] = QRec.scaled( in_qs=[x_qtype, y_qtype, None], out_qs=[o_qtype], ) else: params = MatMulTransposedParameters(valid_name) trans_shape = [i for i in range(len(y_shape))] temp = trans_shape[-1] trans_shape[-1] = trans_shape[-2] trans_shape[-2] = temp trans2 = TransposeParameters(f'{valid_name}_tin2', transpose=tuple(trans_shape)) out_dims = params.get_output_size([ Dim.unnamed(x_shape), Dim.unnamed(y_shape[:-2] + y_shape[-2:][::-1]) ]) G.add_edge( NNEdge(from_node=y[0], to_node=trans2, from_idx=y[1], to_idx=0)) G.add_edge( NNEdge(from_node=trans2, to_node=params, from_idx=0, to_idx=1)) biases_params = ConstantInputParameters( f'{valid_name}_biases', dims=Dim.unnamed([out_dims[0].shape[1]]), value=np.zeros((out_dims[0].shape[1]), dtype=np.float32)) G.add_edge( NNEdge(from_node=biases_params, to_node=params, to_idx=2)) if quantized: qrecs[NodeId(trans2)] = QRec.scaled( in_qs=[y_qtype], out_qs=[y_qtype], ) qrecs[NodeId(params)] = QRec.scaled( in_qs=[x_qtype, y_qtype], out_qs=[o_qtype], ) G.add_edge( NNEdge(from_node=x[0], to_node=params, from_idx=x[1], to_idx=0)) pout_dims = x[2].infer_mapping(out_dims[0].shape) all_nodes[node.output[0]] = (params, 0, pout_dims, o_qtype) return params
def attach_rnn(G, x, rnn_params_class, extra_args, valid_name, tensors, used_tensors, hidden_size, input_size, all_nodes, node, seq_len, num_directions): # check if both outputs are used used_outputs = tuple(outp in used_tensors for outp in node.output) if len(used_outputs) == 3 and used_outputs[2]: raise ValueError("outputting the cell state of an LSTM is not supported") # if the full state output is used we need to output all cells n_output_cells = seq_len if used_outputs[0] else 1 # if both outputs are used then both_dir = [] for i in range(num_directions): node_name = valid_name if i == 0 else valid_name + '_rev' rnn_params = rnn_params_class(node_name, n_cells=seq_len, n_states=hidden_size, n_inputs=input_size, n_input_cells=seq_len, n_output_cells=n_output_cells, output_directions=True, revert=(i==1), **extra_args) both_dir.append(rnn_params) t = tensors['forward' if i == 0 else 'backward'] for idx, name in enumerate(rnn_params.INPUT_NAMES): if name == 'input': G.add_edge(NNEdge(from_node=x[0], to_node=rnn_params, from_idx=x[1], to_idx=0)) continue if name not in t: continue cparams = ConstantInputParameters("%s_%s" % ( node_name, name), value=t[name], dims=Dim.unnamed(t[name].shape)) G.add_edge(NNEdge(from_node=cparams, to_node=rnn_params, from_idx=0, to_idx=idx)) # Link the state weights to the input weights # The autotiler expects the state and input weights to be # concatenated. This tells the constant code generator to do this rnn_in_edges = [in_edge for in_edge in G.in_edges(rnn_params.name)] in_nodes = {} for edge in rnn_in_edges: in_nodes[edge.to_idx] = edge.from_node if isinstance(rnn_params, LSTMParameters): for gate in ['i', 'o', 'c', 'f']: i_w_node = in_nodes[LSTMParameters.INPUT_NAMES.index('i_2_%s_w' % gate)] r_w_node = in_nodes[LSTMParameters.INPUT_NAMES.index('r_2_%s_w' % gate)] r_w_node.concated_nodes.append(i_w_node) i_w_node.generate_value = False elif isinstance(rnn_params, GRUParameters): for gate in ['r', 'z', 'h']: i_w_node = in_nodes[GRUParameters.INPUT_NAMES.index('w_2_%s_w' % gate)] r_w_node = in_nodes[GRUParameters.INPUT_NAMES.index('r_2_%s_w' % gate)] r_w_node.concated_nodes.append(i_w_node) i_w_node.generate_value = False elif isinstance(rnn_params, RNNParameters): for gate in ['i']: i_w_node = in_nodes[RNNParameters.INPUT_NAMES.index('i_2_%s_w' % gate)] r_w_node = in_nodes[RNNParameters.INPUT_NAMES.index('r_2_%s_w' % gate)] r_w_node.concated_nodes.append(i_w_node) i_w_node.generate_value = False # trim batch dimension from state values for state_node_name in ['i_state', 'c_state', 'h_state']: if state_node_name not in rnn_params.INPUT_NAMES: continue state_node = in_nodes[rnn_params.INPUT_NAMES.index(state_node_name)] # set by default as allocated state_node.at_options.allocate = True state_node.is_constant = False # reset state after each invocation state_node.always_copy = True # add a single reset state_node.reset_name = "Reset" out_idx = 0 if used_outputs[0] else 1 if num_directions > 1: # if it is bidir then we need a concat concat_params = ConcatParameters(valid_name + '_bidir', axis=0) for idx in range(num_directions): G.add_edge(NNEdge(from_node=both_dir[idx], to_node=concat_params, from_idx=out_idx, to_idx=idx)) out_edge = (concat_params, 0) else: out_edge = (both_dir[0], out_idx) if out_idx == 0: # if output 0 is used then the expected dims are (steps, dirs, hidden_size) trans_params = TransposeParameters(valid_name + '_trans', transpose=(1, 0, 2)) G.add_edge(NNEdge(out_edge[0], trans_params, from_idx=out_edge[1])) out_edge = (trans_params, 0) if used_outputs[0] and used_outputs[1]: raise ValueError('recurrent network with both last output and all states output is not supported') if used_outputs[0]: all_nodes[node.output[0]] = (out_edge[0], out_edge[1], ProvisionalDim( (n_output_cells, num_directions, None, hidden_size))) out_edge[0].meta['onnx_output'] = [node.output[0]] else: all_nodes[node.output[1]] = (out_edge[0], out_edge[1], ProvisionalDim((num_directions, None, hidden_size))) out_edge[0].meta['onnx_output'] = [node.output[1]] return out_edge[0]
def attach_rnn(G, x, rnn_params_class, extra_args, valid_name, tensors, used_tensors, hidden_size, input_size, all_nodes, node, seq_len, num_directions): # check if both outputs are used used_outputs = tuple(outp in used_tensors for outp in node.output) output_cell = len(used_outputs) == 3 and used_outputs[2] # if the full state output is used we need to output all cells n_output_cells = seq_len if used_outputs[0] else 1 # if both outputs are used then both_dir = [] for i in range(num_directions): node_name = valid_name if i == 0 else valid_name + '_rev' rnn_params = rnn_params_class(node_name, n_cells=seq_len, n_states=hidden_size, n_inputs=input_size, n_input_cells=seq_len, n_output_cells=n_output_cells, output_directions=True, revert=(i == 1), **extra_args) if output_cell: rnn_params.lstm_output_c_state = True both_dir.append(rnn_params) t = tensors['forward' if i == 0 else 'backward'] for idx, name in enumerate(rnn_params.INPUT_NAMES): if name == 'input': G.add_edge( NNEdge(from_node=x[0], to_node=rnn_params, from_idx=x[1], to_idx=0)) continue if name not in t: continue if isinstance(t[name], tuple): cparams = t[name][0] cidx = t[name][1] else: cparams = ConstantInputParameters( "%s_%s" % (node_name, name), value=t[name], dims=Dim.unnamed(t[name].shape)) cidx = 0 G.add_edge( NNEdge(from_node=cparams, to_node=rnn_params, from_idx=cidx, to_idx=idx)) # TODO - Move to the quantizer (and make a qtype attr) since this depends on the kernel used # Link the state weights to the input weights # The autotiler expects the state and input weights to be # concatenated. This tells the constant code generator to do this rnn_in_edges = [in_edge for in_edge in G.in_edges(rnn_params.name)] in_nodes = {} for edge in rnn_in_edges: in_nodes[edge.to_idx] = edge.from_node # trim batch dimension from state values for state_node_name in ['i_state', 'c_state', 'h_state']: if state_node_name not in rnn_params.INPUT_NAMES: continue state_node = in_nodes[rnn_params.INPUT_NAMES.index( state_node_name)] # set by default as allocated state_node.at_options.allocate = True state_node.is_constant = False # reset state after each invocation state_node.always_copy = True # add a single reset state_node.reset_name = "Reset" out_idx = 0 if used_outputs[0] else 1 if num_directions > 1: # if it is bidir then we need a concat concat_params = ConcatParameters(G.unique_name(valid_name + '_bidir'), axis=0) if output_cell: output_cell = (ConcatParameters(G.unique_name(valid_name + '_cstate_bidir'), axis=0), 0) for idx in range(num_directions): G.add_edge( NNEdge(from_node=both_dir[idx], to_node=concat_params, from_idx=out_idx, to_idx=idx)) if output_cell: G.add_edge( NNEdge(from_node=both_dir[idx], to_node=output_cell[0], from_idx=2, to_idx=idx)) out_edge = (concat_params, 0) else: out_edge = (both_dir[0], out_idx) if output_cell: output_cell = (both_dir[0], 1) if out_idx == 0: # if output 0 is used then the expected dims are (steps, dirs, hidden_size) trans_params = TransposeParameters( G.unique_name(f'{valid_name}_trans'), transpose=(1, 0, 2)) G.add_edge(NNEdge(out_edge[0], trans_params, from_idx=out_edge[1])) out_edge = (trans_params, 0) if used_outputs[0]: all_nodes[node.output[0]] = (out_edge[0], out_edge[1], ProvisionalDim( (n_output_cells, num_directions, None, hidden_size)), None) out_edge[0].meta['onnx_output'] = [node.output[0]] if used_outputs[1]: sslice = StridedSliceParameters( G.unique_name(f'{valid_name}_split'), act_slice=((n_output_cells - 1, n_output_cells, 1), (0, num_directions, 1), (0, hidden_size, 1)), out_shape=(num_directions, hidden_size)) G.add_edge( NNEdge(from_node=out_edge[0], from_idx=out_edge[1], to_node=sslice)) all_nodes[node.output[1]] = (sslice, 0, ProvisionalDim( (num_directions, None, hidden_size)), None) else: all_nodes[node.output[1]] = (out_edge[0], out_edge[1], ProvisionalDim((num_directions, None, hidden_size)), None) out_edge[0].meta['onnx_output'] = [node.output[1]] if output_cell: all_nodes[node.output[2]] = (output_cell[0], output_cell[1], ProvisionalDim((num_directions, None, hidden_size)), None) return out_edge[0]
def attach_rnn(G, x, rnn_params_class, extra_args, valid_name, tensors, used_tensors, hidden_size, input_size, all_nodes, node, seq_len, num_directions): # check if both outputs are used used_outputs = tuple(outp in used_tensors for outp in node.output) if len(used_outputs) == 3 and used_outputs[2]: raise ValueError( "outputting the cell state of an LSTM is not supported") # if the full state output is used we need to output all cells n_output_cells = seq_len if used_outputs[0] else 1 # if both outputs are used then both_dir = [] for i in range(num_directions): node_name = valid_name if i == 0 else valid_name + '_rev' rnn_params = rnn_params_class(node_name, n_cells=seq_len, n_states=hidden_size, n_inputs=input_size, n_input_cells=seq_len, n_output_cells=n_output_cells, output_directions=True, revert=(i == 1), **extra_args) both_dir.append(rnn_params) t = tensors['forward' if i == 0 else 'backward'] for idx, name in enumerate(rnn_params.INPUT_NAMES): if name == 'input': G.add_edge( NNEdge(from_node=x[0], to_node=rnn_params, from_idx=x[1], to_idx=0)) continue if name not in t: continue cparams = ConstantInputParameters("%s_%s" % (node_name, name), value=t[name], dims=Dim.unnamed( t[name].shape)) G.add_edge( NNEdge(from_node=cparams, to_node=rnn_params, from_idx=0, to_idx=idx)) out_idx = 0 if used_outputs[0] else 1 if num_directions > 1: # if it is bidir then we need a concat concat_params = ConcatParameters(valid_name + '_bidir', axis=0) for idx in range(num_directions): G.add_edge( NNEdge(from_node=both_dir[idx], to_node=concat_params, from_idx=out_idx, to_idx=idx)) out_edge = (concat_params, 0) else: out_edge = (both_dir[0], out_idx) if out_idx == 0: # if output 0 is used then the expected dims are (steps, dirs, hidden_size) trans_params = TransposeParameters(valid_name + '_trans', transpose=(1, 0, 2)) G.add_edge(NNEdge(out_edge[0], trans_params, from_idx=out_edge[1])) out_edge = (trans_params, 0) if used_outputs[0] and used_outputs[1]: raise ValueError( 'recurrent network with both last output and all states output is not supported' ) if used_outputs[0]: all_nodes[node.output[0]] = (out_edge[0], out_edge[1], ProvisionalDim( (n_output_cells, num_directions, None, hidden_size))) out_edge[0].meta['onnx_output'] = [node.output[0]] else: all_nodes[node.output[1]] = (out_edge[0], out_edge[1], ProvisionalDim((num_directions, None, hidden_size))) out_edge[0].meta['onnx_output'] = [node.output[1]] return out_edge[0]