def check_init_states(graph: Graph, match: dict): """ Check if cell have initial states and create zeros states if not. """ rnn_layer = match['rnn_layer'] num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1 batch_size = rnn_layer.in_node(0).shape[rnn_layer.batch_dim] h_init_port = 5 c_init_port = 6 if h_init_port not in rnn_layer.in_nodes(): h_shape = [num_directions, batch_size, rnn_layer.hidden_size] # from ONNX spec h_init = np.full(h_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': h_init, 'shape': np.array(h_init.shape, dtype=np.int64)}, {'in': h_init_port, 'permutation': None} ) if rnn_layer.op == 'LSTM': if c_init_port not in rnn_layer.in_nodes(): c_shape = [num_directions, batch_size, rnn_layer.hidden_size] # from ONNX spec c_init = np.full(c_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': c_init, 'shape': np.array(c_init.shape, dtype=np.int64)}, {'in': c_init_port, 'permutation': None} )
def repack_weights(self, graph: Graph, match: dict): # Concat W, R in IE- format # Delete useless num_dir dimensions and n_cells dimensions in W, R, B (peepholes?) lstm = match['rnn_layer'] W, R, B = match['W'].value.copy(), match['R'].value.copy(), match['B'].value.copy() graph.remove_edge(match['W'].id, lstm.id) graph.remove_edge(match['R'].id, lstm.id) graph.remove_edge(match['B'].id, lstm.id) # Sum component of B that correspond to W and R if lstm.op == 'GRU' and lstm.linear_before_reset: B_shape = np.array(B.shape) B_shape[3] = 4 B_shape[2] = 1 B_tmp = np.zeros(shape=B_shape) B_tmp[:, :, :, 0, :] = B[:, :, 0, 0, :] + B[:, :, 1, 0, :] B_tmp[:, :, :, 1, :] = B[:, :, 0, 1, :] + B[:, :, 1, 1, :] B_tmp[:, :, :, 2, :] = B[:, :, 0, 2, :][:, :, np.newaxis, :] B_tmp[:, :, :, 3, :] = B[:, :, 1, 2, :][:, :, np.newaxis, :] B = B_tmp else: B = np.add.reduce(B, axis=2, keepdims=True) # Concatenate W, R to IE-compatible format assert len(W.shape) == 5 assert len(R.shape) == 5 WR = np.concatenate([W, R], axis=4) # Squeeze useless dimensions assert WR.shape[0] == 1 # num_dir == 1 assert WR.shape[1] == 1 # num_cells == 1 assert B.shape[0] == 1 assert B.shape[1] == 1 WR = WR.squeeze(axis=(0, 1)) B = B.squeeze(axis=(0, 1)) # Flatten all output (0, 1) and input dimensions (2, 3) final_shape_WR = [WR.shape[0] * WR.shape[1], -1] assert final_shape_WR[0] == lstm.hidden_size * lstm.multiplier WR = WR.reshape(final_shape_WR) final_shape_B = final_shape_WR if lstm.op == 'GRU' and lstm.linear_before_reset: final_shape_B[0] = lstm.hidden_size * 4 B = B.reshape(final_shape_B) # Squeeze fake dimension in B B = B.squeeze(axis=-1) for blob, port, name in [(WR, 1, 'weights'), (B, 2, 'biases')]: Op.create_and_connect_input_data_node( graph, lstm, {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)}, {'in': port, 'bin': name, 'permutation': None} )
def copy_input_blobs(op: Node, copy_op: Node): """ Function copy input blob data nodes from restored graph to copied one :param op: Node from restored graph :param copy_op: Node from copied graph :return: """ for u, d in op.get_sorted_inputs(): if 'bin' in d: Op.create_and_connect_input_data_node(copy_op.graph, copy_op, {'value': op.in_node(d['in']).value, 'shape': op.in_node(d['in']).shape}, d)
def repack_weights(graph: Graph, match: dict): """ Repack weights into general format (described above) and reorder gates. """ rnn_layer = match['rnn_layer'] W = match['W'].value.copy() R = match['R'].value.copy() num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1 graph.remove_edge(match['W'].id, rnn_layer.id) graph.remove_edge(match['R'].id, rnn_layer.id) # find optional 'B' biases blob if 3 in rnn_layer.in_nodes(): # TODO: check if 'bin': 'B' attribute is assigned to this edge B = rnn_layer.in_node(3).value.copy() graph.remove_edge(rnn_layer.in_node(3).id, rnn_layer.id) else: B_shape = [num_directions, 2 * rnn_layer.multiplier * rnn_layer.hidden_size] # from ONNX spec B = np.full(B_shape, 0, dtype=np.float32) # Add extra dimensions for W, R and B for easier repacking and reordering B = B.reshape([ num_directions, # 0: num of directions rnn_layer.num_layers, # 1: num_layers 2, # 2: two input parts of the matrix: W, R rnn_layer.multiplier, # 3: four output parts of the matrix for all gates in order: i, o, f, c rnn_layer.hidden_size, # 4: output size per direction and gate ]) W, R = [x.reshape([ num_directions, # 0: num of directions rnn_layer.num_layers, # 1: num_layers rnn_layer.multiplier, # 2: four output parts of the matrix for all gates in order: i, o, f, c rnn_layer.hidden_size, # 3: output size per direction and gate -1]) # 4: input size/hidden size in W/R correspondingly for x in (W, R)] input_size = match['input'].shape[2] assert input_size == W.shape[-1] # Reorder gates: iofc --> fico gate_reorder = rnn_layer.gate_order W, R = (np.take(x, gate_reorder, axis=2) for x in (W, R)) B = np.take(B, gate_reorder, axis=3) for blob, port in [(W, 1), (R, 2), (B, 3)]: Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)}, {'in': port, 'permutation': None} )
def check_init_states(graph: Graph, match: dict): """ Check if cell have initial states and create zeros states if not. And renumber ports for this states. """ rnn_cell = match['rnn_layer'] num_directions = 2 if rnn_cell.direction == 'bidirectional' else 1 batch_size = rnn_cell.in_node(0).shape[rnn_cell.batch_dim] h_init_port = 5 c_init_port = 6 if 2 not in rnn_cell.in_nodes(): h_shape = [num_directions, batch_size, rnn_cell.hidden_size] # from ONNX spec h_init = np.full(h_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_cell, { 'value': h_init, 'shape': np.array(h_init.shape, dtype=np.int64) }, { 'in': h_init_port, 'permutation': None }) else: hidden_state_edge = graph.get_edge_data( rnn_cell.in_node(2).id, rnn_cell.id) hidden_state_edge[0]['in'] = h_init_port if rnn_cell.op == 'LSTM': if 3 not in rnn_cell.in_nodes(): c_shape = [num_directions, batch_size, rnn_cell.hidden_size] # from ONNX spec c_init = np.full(c_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_cell, { 'value': c_init, 'shape': np.array(c_init.shape, dtype=np.int64) }, { 'in': c_init_port, 'permutation': None }) else: cell_state_edge = graph.get_edge_data( rnn_cell.in_node(3).id, rnn_cell.id) cell_state_edge[0]['in'] = c_init_port
def repack_weights(self, graph: nx.MultiDiGraph, match: dict): lstm = match['lstm'] W = match['W'].value.copy() R = match['R'].value.copy() # bidirectional case should be processed separately before this transformation if lstm.direction not in ['forward', 'reverse']: raise Error( 'ONNX/LSTM operator with `forward` or `reverse` is supported only. ' 'Node {} has direction = {} which is not supported.'.format( lstm.name, lstm.direction)) graph.remove_edge(match['W'].id, lstm.id) graph.remove_edge(match['R'].id, lstm.id) # find optional 'B' if 3 in lstm.in_nodes(): # TODO: check if 'bin': 'B' attribute is assigned to this edge B = lstm.in_node(3).value.copy() graph.remove_edge(lstm.in_node(3).id, lstm.id) else: B = np.full([1, lstm.hidden_size * 8], 0, dtype=np.float32) # Add extra dimensions for W, R and B for easier repacking B = B.reshape([ 1, # 0: num of directions, limitation: should be 1 2, # 1: two input parts of the matrix: W, R 4, # 2: four output parts of the matrix for all gates in order: i, o, f, c lstm.hidden_size, # 3: output size per direction and gate 1, # 4: fake dimension to match the input dimension in W and R for shorter code ]) W, R = [ x.reshape([ 1, # 0: num of directions, limitation: should be 1 1, # 1: dummy dimension to be aligned with B 4, # 2: four output parts of the matrix for all gates in order: i, o, f, c lstm.hidden_size, # 3: output size per direction and gate -1 ]) # 4: input size/hidden size in W/R for x in (W, R) ] input_size = match['input'].shape[2] assert input_size == W.shape[-1] WR = np.concatenate([W, R], axis=4) # Reorder gates: iofc --> fico gate_reorder = [2, 0, 3, 1] WR = np.take(WR, gate_reorder, axis=2) B = np.take(B, gate_reorder, axis=2) # Sum component of B that correspond to W and R B = np.add.reduce(B, axis=1, keepdims=True) # Reorder dimensions by collection output dimensions first, then input dimension # Interpret the numbers below by looking at W, R and B reshape above in the code inout_reorder = [0, 2, 3, 1, 4] WR = WR.transpose(inout_reorder) B = B.transpose(inout_reorder) # Supposing it is unidirectional LSTM, squeeze 'direction' dimension assert WR.shape[0] == 1 assert B.shape[0] == 1 WR = WR.squeeze(axis=0) B = B.squeeze(axis=0) # Flatten all output (0, 1) and input dimensions (2, 3) final_shape = [WR.shape[0] * WR.shape[1], -1] WR = WR.reshape(final_shape) B = B.reshape(final_shape) # Squeeze fake dimension in B B = B.squeeze(axis=-1) assert WR.ndim == 2 assert B.ndim == 1 assert WR.shape[0] == lstm.hidden_size * 4 assert B.shape[0] == lstm.hidden_size * 4 assert WR.shape[1] == lstm.hidden_size + input_size for blob, port, name in [(WR, 1, 'weights'), (B, 2, 'biases')]: Op.create_and_connect_input_data_node( graph, lstm, { 'value': blob, 'shape': np.array(blob.shape, dtype=np.int64) }, { 'in': port, 'bin': name, 'permutation': None })
def repack_weights(graph: Graph, match: dict): input = match['input'] rnn_layer = match['rnn_layer'] params = match['params'].value.copy() graph.remove_edge(match['params'].id, rnn_layer.id) input_size = input.shape[2] direction = 2 if rnn_layer.has_num_directions else 1 bsize = (2 * rnn_layer.hidden_size * direction * 1) * rnn_layer.multiplier W = np.array(params[0:len(params) - bsize]) B = np.array(params[len(params) - bsize:]) W = W.reshape((direction, -1)) B = B.reshape((direction, -1)) W, R = np.array(W[:, 0:rnn_layer.hidden_size * rnn_layer.multiplier * input_size]), np.array( W[:, rnn_layer.hidden_size * rnn_layer.multiplier * input_size:]) W, R = [ x.reshape([ direction, # 0: num of directions 1, # 1: num_cells rnn_layer. multiplier, # 2: four output parts of the matrix for all gates rnn_layer.hidden_size, # 3: output size per direction and gate -1 ]) # 4: input size/hidden size in W/R correspondingly for x in (W, R) ] assert W.shape[-1] == input_size assert R.shape[-1] == rnn_layer.hidden_size B = B.reshape([ direction, # 0: num of directions, limitation: should be 1 1, 2, # 3: num of component B rnn_layer. multiplier, # 1: four output parts of the matrix for all gates in order: i, f, c, o rnn_layer.hidden_size, # 2: output size per direction and gate ]) # Reorder gates: ifco --> fico gate_reorder = rnn_layer.gate_order W = np.take(W, gate_reorder, axis=2) R = np.take(R, gate_reorder, axis=2) B = np.take(B, gate_reorder, axis=3) for blob, port in [(W, 1), (R, 2), (B, 3)]: Op.create_and_connect_input_data_node( graph, rnn_layer, { 'value': blob, 'shape': np.array(blob.shape, dtype=np.int64) }, { 'in': port, 'permutation': None })
def repack_weights(self, graph: nx.MultiDiGraph, input: Node, lstm: Node, params: np.array): input_size = input.shape[2] direction = 2 if lstm.has_num_directions else 1 bsize = (2*lstm.hidden_size*direction*1)*4 assert direction == 1 W = np.array(params[0:len(params) - bsize]) B = np.array(params[len(params) - bsize:]) WX = np.array(W[0:lstm.hidden_size*4*input_size]) WH = np.array(W[lstm.hidden_size*4*input_size:]) WX = WX.reshape([lstm.hidden_size*4, input_size]) WH = WH.reshape([lstm.hidden_size*4, lstm.hidden_size]) WX = WX.transpose([1, 0]) WH = WH.transpose([1, 0]) WX = WX.reshape([ 1, # 0: num of directions, limitation: should be 1 -1, # 3: input size 4, # 1: four output parts of the matrix for all gates in order: i, f, c, o lstm.hidden_size, # 2: output size per direction and gate ]) WH = WH.reshape([ 1, # 0: num of directions, limitation: should be 1 -1, # 3: hidden state size 4, # 1: four output parts of the matrix for all gates in order: i, f, c, o lstm.hidden_size, # 2: output size per direction and gate ]) B = B.reshape([ 1, # 0: num of directions, limitation: should be 1 2, # 3: num of component B 4, # 1: four output parts of the matrix for all gates in order: i, f, c, o lstm.hidden_size, # 2: output size per direction and gate ]) assert WX.shape[1] == input_size assert WH.shape[1] == lstm.hidden_size W = np.concatenate([WX, WH], axis=1) # Reorder gates: ifco --> fico gate_reorder = [1, 0, 2, 3] W = np.take(W, gate_reorder, axis=2) B = np.take(B, gate_reorder, axis=2) inout_reorder = [0, 2, 3, 1] W = W.transpose(inout_reorder) B = B.transpose(inout_reorder) final_shape = [W.shape[0] * W.shape[1] * lstm.hidden_size, -1] W = W.reshape(final_shape) B = B.reshape(final_shape) # Sum component of B B = np.add.reduce(B, axis=1, keepdims=True) B = B.squeeze(axis=1) assert W.ndim == 2 assert B.ndim == 1 assert W.shape[0] == lstm.hidden_size * 4 assert B.shape[0] == lstm.hidden_size * 4 assert W.shape[1] == lstm.hidden_size + input_size for blob, port, name in [(W, 1, 'weights'), (B, 2, 'biases')]: Op.create_and_connect_input_data_node( graph, lstm, {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)}, {'in': port, 'bin': name, 'permutation': None} )