def extract(node): # update the attributes of the node block_size = node.pb.attr['block_size'].i data_format = node.pb.attr['data_format'].s.decode('utf-8') Op.get_op_class_by_name(__class__.op).update_node_stat(node, {'block_size': block_size, 'data_format': data_format}) return __class__.enabled
def extract(node): param = node.pb.python_param attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str) update_attrs = { 'feat_stride': 16, 'base_size': 16, 'min_size': 16, 'ratio': [0.5, 1, 2], 'scale': [8, 16, 32], 'pre_nms_topn': 6000, 'post_nms_topn': 300, 'nms_thresh': 0.7 } if 'ratios' in attrs and 'ratio' in attrs: log.error( 'Both ratios and ratio found, value of ratios will be used', extra={'is_warning': True}) if 'scales' in attrs and 'scale' in attrs: log.error( 'Both scales and scale found, value of scales will be used', extra={'is_warning': True}) if 'ratios' in attrs: attrs['ratio'] = attrs['ratios'] del attrs['ratios'] if 'scales' in attrs: attrs['scale'] = attrs['scales'] del attrs['scales'] update_attrs.update(attrs) CaffePythonFrontExtractorOp.check_param( Op.get_op_class_by_name('Proposal'), update_attrs) Op.get_op_class_by_name('Proposal').update_node_stat( node, update_attrs) return __class__.enabled
def extract(node): proto_layer = node.pb param = proto_layer.region_yolo_param flatten_param = proto_layer.flatten_param axis = flatten_param.axis end_axis = flatten_param.end_axis coords = param.coords classes = param.classes num = param.num update_attrs = { 'coords': coords, 'classes': classes, 'num': num, 'do_softmax': int(param.do_softmax), 'anchors': np.array(param.anchors), 'mask': np.array(param.mask) } flatten_attrs = {'axis': axis, 'end_axis': end_axis} mapping_rule = merge_attrs(param, update_attrs) mapping_rule.update(flatten_attrs) mapping_rule.update(layout_attrs()) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def extract(node): proto_layer = node.pb param = proto_layer.prior_box_param variance = param.variance if len(variance) == 0: variance = [0.1] update_attrs = { 'aspect_ratio': np.array(param.aspect_ratio), 'min_size': np.array(param.min_size), 'max_size': np.array(param.max_size), 'flip': int(param.flip), 'clip': int(param.clip), 'variance': list(variance), 'img_size': param.img_size, 'img_h': param.img_h, 'img_w': param.img_w, 'step': param.step, 'step_h': param.step_h, 'step_w': param.step_w, 'offset': param.offset, } mapping_rule = merge_attrs(param, update_attrs) mapping_rule.update(layout_attrs()) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def extract(node): proto_layer = node.pb param = proto_layer.augmentation_param # slice_dim is deprecated parameter and is used as alias for axis # however if slice_dim is defined and axis is default, we use slice_dim update_attrs = { 'crop_width': param.crop_width, 'crop_height': param.crop_height, 'write_augmented': param.write_augmented, 'max_multiplier': param.max_multiplier, 'augment_during_test': int(param.augment_during_test), 'recompute_mean': param.recompute_mean, 'write_mean': param.write_mean, 'mean_per_pixel': int(param.mean_per_pixel), 'mean': param.mean, 'mode': param.mode, 'bottomwidth': param.bottomwidth, 'bottomheight': param.bottomheight, 'num': param.num, 'chromatic_eigvec': param.chromatic_eigvec } mapping_rule = merge_attrs(param, update_attrs) if node.model_pb: for index in range(0, len(node.model_pb.blobs)): embed_input(mapping_rule, index + 1, 'custom_{}'.format(index), node.model_pb.blobs[index].data) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def check_init_states(graph: Graph, match: dict): """ Check if cell have initial states and create zeros states if not. """ rnn_layer = match['rnn_layer'] num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1 batch_size = rnn_layer.in_node(0).shape[rnn_layer.batch_dim] h_init_port = 5 c_init_port = 6 if h_init_port not in rnn_layer.in_nodes(): h_shape = [num_directions, batch_size, rnn_layer.hidden_size] # from ONNX spec h_init = np.full(h_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': h_init, 'shape': np.array(h_init.shape, dtype=np.int64)}, {'in': h_init_port, 'permutation': None} ) if rnn_layer.op == 'LSTM': if c_init_port not in rnn_layer.in_nodes(): c_shape = [num_directions, batch_size, rnn_layer.hidden_size] # from ONNX spec c_init = np.full(c_shape, 0, dtype=np.float32) Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': c_init, 'shape': np.array(c_init.shape, dtype=np.int64)}, {'in': c_init_port, 'permutation': None} )
def apply_scale(graph: Graph, input_node: Node, node_mean_scale_values: dict): if 'scale' in node_mean_scale_values and node_mean_scale_values[ 'scale'] is not None: if all([x == 1 for x in node_mean_scale_values['scale']]): return out_node = input_node.out_node() if not input_node.has_valid('shape'): raise Error("Node {} has not valid shape attribute".format( input_node.id)) input_shape = input_node.shape # Create Mul node value = 1 / np.array(node_mean_scale_values['scale']) graph.remove_edge(input_node.id, out_node.id) mul_node = Mul(graph, dict(name="Mul_")) mul_data = Op.create_input_data_node(graph, "data_mul_", np.array(value)) Op.expand_node_shape(mul_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0)) mul_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape}) mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=out_node)
def extract_proposal_params(node, defaults): param = node.pb.python_param attrs = CaffePythonFrontExtractorOp.parse_param_str(param.param_str) update_attrs = defaults if 'ratios' in attrs and 'ratio' in attrs: log.error( 'Both ratios and ratio found, value of ratios will be used', extra={'is_warning': True}) if 'scales' in attrs and 'scale' in attrs: log.error( 'Both scales and scale found, value of scales will be used', extra={'is_warning': True}) if 'ratios' in attrs: attrs['ratio'] = attrs['ratios'] del attrs['ratios'] if 'scales' in attrs: attrs['scale'] = attrs['scales'] del attrs['scales'] update_attrs.update(attrs) CaffePythonFrontExtractorOp.check_param( Op.get_op_class_by_name('Proposal'), update_attrs) Op.get_op_class_by_name('Proposal').update_node_stat( node, update_attrs)
def _scale_input_action_mul(graph: nx.MultiDiGraph, match: dict, scale: float): assert (len(match['placeholder'].out_nodes())) tinput = match['placeholder'] if not tinput.has_valid('shape'): raise Error("Node {} has not valid shape attribute".format(tinput.id)) input_shape = tinput.shape toutput = match['data'] # Create Mul node value = np.array([1 / scale]) # Disconnect input with data node graph.remove_edge(tinput.id, toutput.id) # Create Mul node mul_node = Mul(graph, dict(name="Mul1_")) mul_data = Op.create_input_data_node(graph, "data_mul_scale_", np.array(value)) Op.expand_node_shape( mul_data, len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0) mul_input = Op.create_data_node(graph, tinput, {'shape': toutput.shape}) mul_node.create_node_with_data(inputs=[mul_input, mul_data], data_nodes=toutput)
def extract(node): proto_layer = node.pb param = proto_layer.resample_param types = [ "", 'caffe.ResampleParameter.NEAREST', 'caffe.ResampleParameter.LINEAR', 'caffe.ResampleParameter.CUBIC', 'caffe.ResampleParameter.AREA' ] resample_type = types[param.type] update_attrs = { 'antialias': int(param.antialias), 'height': param.height, 'width': param.width, 'type': resample_type, 'factor': param.factor } mapping_rule = merge_attrs(param, update_attrs) mapping_rule['resample_type'] = mapping_rule['type'] mapping_rule.pop('type') # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def apply_mean_value(graph: Graph, input_node: Node, node_mean_scale_values: dict): if 'mean' in node_mean_scale_values and node_mean_scale_values[ 'mean'] is not None: if all([x == 0 for x in node_mean_scale_values['mean']]): return out_node = input_node.out_node() if not input_node.has_valid('shape'): raise Error("Node {} has not valid shape attribute".format( input_node.id)) input_shape = input_node.shape # Create Add node graph.remove_edge(input_node.id, out_node.id) value = np.array(node_mean_scale_values['mean']) * (-1) add_node = Add(graph, dict(name="Add_")) add_data = Op.create_input_data_node(graph, "data_add_", np.array(value)) Op.expand_node_shape(add_data, (len(input_shape) - 2 if graph.graph['layout'] == 'NCHW' else 0)) add_input = Op.create_data_node(graph, input_node, {'shape': out_node.shape}) add_node.create_node_with_data(inputs=[add_input, add_data], data_nodes=out_node)
def split_bilstm(self, bilstm, new_init_hiddens, new_init_cells, splitted_W, splitted_R, splitted_B): """ Split one bilstm node into 2 one-directional lstm nodes. All input data nodes should be already prepared; they are have 2 in the major dimension. """ assert len(bilstm.out_nodes()) == 3 all_outputs = [] for i in [0, 1]: direction = ['forward', 'reverse'][i] op = LSTMSequence( bilstm.graph, { 'hidden_size': bilstm.hidden_size, 'direction': direction, 'batch_dim': bilstm.batch_dim, 'sequence_dim': bilstm.sequence_dim, 'blobs_wrb': bilstm.blobs_wrb, 'has_num_directions': bilstm.has_num_directions, 'format': bilstm.format, 'name': bilstm.name + '/Split/' + direction, }) output_data = Op._create_data_node( bilstm.graph, name=bilstm.out_node(0).name + '/Split/' + str(i), attrs={'shape': bilstm.out_node(0).shape.copy()}) assert output_data.shape[1] == 2 output_data.shape[1] = 1 output_hidden = Op._create_data_node( bilstm.graph, name=bilstm.out_node(1).name + '/Split/' + str(i), attrs={'shape': bilstm.out_node(1).shape.copy()}) assert output_hidden.shape[0] == 2 output_hidden.shape[0] = 1 output_cell = Op._create_data_node( bilstm.graph, name=bilstm.out_node(2).name + '/Split/' + str(i), attrs={'shape': bilstm.out_node(2).shape.copy()}) assert output_cell.shape[0] == 2 output_cell.shape[0] = 1 all_outputs.append( op.create_node_with_data( inputs=[ bilstm.in_node(0), splitted_W[i], splitted_R[i], splitted_B[i], None, new_init_hiddens[i], new_init_cells[i], ], data_nodes=[output_data, output_hidden, output_cell])) return all_outputs
def extract(node): attrs = get_mxnet_layer_attrs(node.symbol_dict) pre_nms_topn = attrs.int('rpn_pre_nms_top_n', 6000) post_nms_topn = attrs.int('rpn_post_nms_top_n', 300) nms_thresh = attrs.float('threshold', 0.7) min_size = attrs.int('rpn_min_size', 16) scale = attrs.tuple("scales", float, (4, 8, 16, 32)) ratio = attrs.tuple("ratios", float, (0.5, 1, 2)) feat_stride = attrs.int('feature_stride', 16) update_attrs = { 'feat_stride': feat_stride, 'ratio': np.array(ratio), 'min_size': min_size, 'scale': np.array(scale), 'pre_nms_topn': pre_nms_topn, 'post_nms_topn': post_nms_topn, 'nms_thresh': nms_thresh, 'base_size': feat_stride } # update the attributes of the node Op.get_op_class_by_name('Proposal').update_node_stat( node, update_attrs) return __class__.enabled
def extract(node): proto_layer = node.pb pb_model = node.model_pb param = proto_layer.prelu_param update_attrs = { 'channel_shared': int(param.channel_shared) } variance_norm_caffe_map = { 0: 'caffe.FillerParameter.FAN_IN', 1: 'caffe.FillerParameter.FAN_OUT', 2: 'caffe.FillerParameter.AVERAGE' } if hasattr(param, 'filler'): update_attrs.update({ 'filler_type': param.filler.type, 'filler_value': int(param.filler.value), 'min': int(param.filler.min), 'max': int(param.filler.max), 'mean': int(param.filler.mean), 'std': int(param.filler.std), 'sparse': param.filler.sparse, 'variance_norm': variance_norm_caffe_map[param.filler.variance_norm] }) mapping_rule = merge_attrs(param, update_attrs) mapping_rule.update(weights_biases(False, pb_model)) mapping_rule.update(layout_attrs()) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat(node, mapping_rule) return __class__.enabled
def _fused_batch_norm_decomposition(graph: Graph, tinput: Node, toutput: Node, gamma: Node, beta: Node, mean: np.ndarray, variance: np.ndarray, can_be_fused=True): """ This is common function for TF, Caffe and MXNet It creates Mul->Add->Mul->Add subgraph """ shape = tinput.shape # Create first Mul & Add operations mul1_node = Mul(graph, dict(name="Mul1_", can_be_fused=can_be_fused)) add1_node = Add(graph, dict(name="Add1_", can_be_fused=can_be_fused)) mul1_data = Op.create_input_data_node(graph, "data_mul_", np.array(mean)) add1_data = Op.create_input_data_node(graph, "data_add_", np.array(variance)) # Broadcast const from scalar # We can broadcast only when const.value is scalar if gamma.shape[0] != gamma.value.shape[0]: gamma.value.resize(gamma.shape) gamma.value.fill(gamma.value[0]) # Create second Mul & Add mul2_node = Mul(graph, dict(name="Mul2_", can_be_fused=can_be_fused)) add2_node = Add(graph, dict(name="Add2_", can_be_fused=can_be_fused)) add2_node.create_node_with_data( inputs=[mul2_node.create_node_with_data( inputs=[add1_node.create_node_with_data( inputs=[mul1_node.create_node_with_data(inputs=[tinput, mul1_data]), add1_data]), gamma]), beta], data_nodes=toutput)
def extract(node): proto_layer = node.pb param = proto_layer.correlation_param corr_type = 'caffe.CorrelationParameter.MULTIPLY' if param.correlation_type == 1: corr_type = 'caffe.CorrelationParameter.SUBTRACT' update_attrs = { 'pad': param.pad, 'kernel_size': param.kernel_size, 'max_displacement': param.max_displacement, 'stride_1': param.stride_1, 'stride_2': param.stride_2, 'single_direction': param.single_direction, 'do_abs': int(param.do_abs), 'correlation_type': corr_type, } mapping_rule = merge_attrs(param, update_attrs) mapping_rule.update(layout_attrs()) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def extract(node): proto_layer = node.pb param = proto_layer.accum_param attrs = collect_attributes(param) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat(node, attrs) return __class__.enabled
def extract(node): mapping_rule = collect_attributes(node.pb.shuffle_channel_param) mapping_rule.update(layout_attrs()) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, mapping_rule) return __class__.enabled
def extract(node): attrs = { 'data_type': tf_dtype_extractor(node.pb.attr["dtype"].type), 'shape': tf_tensor_shape(node.pb.attr["shape"].shape), 'identity': True, } Op.update_node_stat(node, attrs) return __class__.enabled
def convert_batch_norm(graph: nx.MultiDiGraph): """ This function finds FusedBatchNorm layer (or BatchNorm for MXNet) and replaces with Mul->Add->Mul->Add sequence. """ for n in list(graph.nodes()): node = Node(graph, n) if node.has_valid('op') and (node.op == 'FusedBatchNorm' or node.op == 'BatchNorm' or node.op == 'BatchNormalization'): toutput = node.out_node() tinput = node.in_node(0) if any([ node.in_node(i).value is None for i in range(1, len(node.in_nodes())) ]): log.warning( 'Cannot translate FusedBatchNorm {} node with non-constant weights' .format( node.name if node.has_valid('name') else '<UNKNOWN>')) continue const = node.in_node(1) beta = node.in_node(2) mean = node.in_node(3) variance = node.in_node(4) eps = node.eps if node.has_valid('fix_gamma') and node.fix_gamma: const.value.fill(1.) can_be_fused = False if not node.soft_get('can_be_fused') else True # Remove edges from FusedBN node graph.remove_edge(tinput.id, node.id) graph.remove_edge(beta.id, node.id) graph.remove_edge(const.id, node.id) graph.remove_edge(mean.id, node.id) graph.remove_edge(variance.id, node.id) graph.remove_edge(node.id, toutput.id) scale = 1. / np.sqrt(variance.value + eps) shift = (mean.value * (-1)) * scale # Expand dims for current layout broadcast_dims_cnt = len( tinput.shape) - 2 if graph.graph['layout'] == 'NCHW' else 0 # Update values and shapes with new shape Op.expand_node_shape(const, broadcast_dims_cnt) Op.expand_node_shape(beta, broadcast_dims_cnt) for idx in range(broadcast_dims_cnt): scale = np.expand_dims(scale, axis=-1) shift = np.expand_dims(shift, axis=-1) _fused_batch_norm_decomposition(graph, tinput, toutput, const, beta, scale, shift, can_be_fused)
def repack_weights(self, graph: Graph, match: dict): # Concat W, R in IE- format # Delete useless num_dir dimensions and n_cells dimensions in W, R, B (peepholes?) lstm = match['rnn_layer'] W, R, B = match['W'].value.copy(), match['R'].value.copy(), match['B'].value.copy() graph.remove_edge(match['W'].id, lstm.id) graph.remove_edge(match['R'].id, lstm.id) graph.remove_edge(match['B'].id, lstm.id) # Sum component of B that correspond to W and R if lstm.op == 'GRU' and lstm.linear_before_reset: B_shape = np.array(B.shape) B_shape[3] = 4 B_shape[2] = 1 B_tmp = np.zeros(shape=B_shape) B_tmp[:, :, :, 0, :] = B[:, :, 0, 0, :] + B[:, :, 1, 0, :] B_tmp[:, :, :, 1, :] = B[:, :, 0, 1, :] + B[:, :, 1, 1, :] B_tmp[:, :, :, 2, :] = B[:, :, 0, 2, :][:, :, np.newaxis, :] B_tmp[:, :, :, 3, :] = B[:, :, 1, 2, :][:, :, np.newaxis, :] B = B_tmp else: B = np.add.reduce(B, axis=2, keepdims=True) # Concatenate W, R to IE-compatible format assert len(W.shape) == 5 assert len(R.shape) == 5 WR = np.concatenate([W, R], axis=4) # Squeeze useless dimensions assert WR.shape[0] == 1 # num_dir == 1 assert WR.shape[1] == 1 # num_cells == 1 assert B.shape[0] == 1 assert B.shape[1] == 1 WR = WR.squeeze(axis=(0, 1)) B = B.squeeze(axis=(0, 1)) # Flatten all output (0, 1) and input dimensions (2, 3) final_shape_WR = [WR.shape[0] * WR.shape[1], -1] assert final_shape_WR[0] == lstm.hidden_size * lstm.multiplier WR = WR.reshape(final_shape_WR) final_shape_B = final_shape_WR if lstm.op == 'GRU' and lstm.linear_before_reset: final_shape_B[0] = lstm.hidden_size * 4 B = B.reshape(final_shape_B) # Squeeze fake dimension in B B = B.squeeze(axis=-1) for blob, port, name in [(WR, 1, 'weights'), (B, 2, 'biases')]: Op.create_and_connect_input_data_node( graph, lstm, {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)}, {'in': port, 'bin': name, 'permutation': None} )
def extract(node): proto_layer = node.pb param = proto_layer.norm_param attrs = collect_attributes(param, enable_flattening_nested_params=True) attrs.update(weights_biases(False, node.model_pb)) # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat(node, attrs) return __class__.enabled
def extract(node): # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat( node, { 'out_max_val': 0, 'top_k': 1, 'axis': None, 'dim_attrs': ['axis'] }) return __class__.enabled
def extract(cls, node): attrs = { 'data_type': tf_dtype_extractor(node.pb.attr["dtype"].type), 'shape': tf_tensor_shape(node.pb.attr["shape"].shape), 'identity': True, 'infer': lambda node: copy_shape_infer(node, value_infer=copy_value), } Op.update_node_stat(node, attrs) return cls.enabled
def extract(node): proto_layer = node.pb param = proto_layer.attr # extracting parameters from TensorFlow layer and prepare them for IR attrs = {'op': __class__.op} # update the attributes of the node Op.get_op_class_by_name(__class__.op).update_node_stat(node, attrs) return __class__.enabled
def extract(cls, node): shapes = node.pb.attr['output_shapes'].list.shape tf_types = node.pb.attr['output_types'].list.type extracted_types = [] for t in tf_types: extracted_types.append(tf_dtype_extractor(t)) result_shapes = [] for shape_pb in shapes: result_shapes.append(tf_tensor_shape(shape_pb)) Op.update_node_stat(node, {'shapes': result_shapes, 'types': extracted_types, 'out_ports_count': 1}) return cls.enabled
def extract(cls, node): narrow_range = node.pb.attr['narrow_range'].b num_bits = node.pb.attr['num_bits'].i levels = 2 ** num_bits - int(narrow_range) # we prepare this operation to be converted to FakeQuantize op, # but input reconnection is needed, so we don't set infer function and type attribute Op.update_node_stat(node, {'op': 'FakeQuantWithMinMaxVars', 'levels': levels, 'narrow_range': narrow_range, 'num_bits': num_bits}) return cls.enabled
def _create_data_if_necessary(self): if self.node.graph.stage == 'front': raise Error("_create_data_if_necessary method is not applicable for front Graph phase!") if self.type == 'in': raise Error("_create_data_if_necessary method is not applicable for 'in' Port type!") if self.idx not in self.node.out_nodes(control_flow=self.control_flow): from mo.ops.op import Op Op.create_data_node(self.node.graph, self.node, out_port=self.idx) self.node['need_shape_inference'] = True return self.node.out_node(self.idx, control_flow=self.control_flow)
def copy_input_blobs(op: Node, copy_op: Node): """ Function copy input blob data nodes from restored graph to copied one :param op: Node from restored graph :param copy_op: Node from copied graph :return: """ for u, d in op.get_sorted_inputs(): if 'bin' in d: Op.create_and_connect_input_data_node(copy_op.graph, copy_op, {'value': op.in_node(d['in']).value, 'shape': op.in_node(d['in']).shape}, d)
def repack_weights(graph: Graph, match: dict): """ Repack weights into general format (described above) and reorder gates. """ rnn_layer = match['rnn_layer'] W = match['W'].value.copy() R = match['R'].value.copy() num_directions = 2 if rnn_layer.direction == 'bidirectional' else 1 graph.remove_edge(match['W'].id, rnn_layer.id) graph.remove_edge(match['R'].id, rnn_layer.id) # find optional 'B' biases blob if 3 in rnn_layer.in_nodes(): # TODO: check if 'bin': 'B' attribute is assigned to this edge B = rnn_layer.in_node(3).value.copy() graph.remove_edge(rnn_layer.in_node(3).id, rnn_layer.id) else: B_shape = [num_directions, 2 * rnn_layer.multiplier * rnn_layer.hidden_size] # from ONNX spec B = np.full(B_shape, 0, dtype=np.float32) # Add extra dimensions for W, R and B for easier repacking and reordering B = B.reshape([ num_directions, # 0: num of directions rnn_layer.num_layers, # 1: num_layers 2, # 2: two input parts of the matrix: W, R rnn_layer.multiplier, # 3: four output parts of the matrix for all gates in order: i, o, f, c rnn_layer.hidden_size, # 4: output size per direction and gate ]) W, R = [x.reshape([ num_directions, # 0: num of directions rnn_layer.num_layers, # 1: num_layers rnn_layer.multiplier, # 2: four output parts of the matrix for all gates in order: i, o, f, c rnn_layer.hidden_size, # 3: output size per direction and gate -1]) # 4: input size/hidden size in W/R correspondingly for x in (W, R)] input_size = match['input'].shape[2] assert input_size == W.shape[-1] # Reorder gates: iofc --> fico gate_reorder = rnn_layer.gate_order W, R = (np.take(x, gate_reorder, axis=2) for x in (W, R)) B = np.take(B, gate_reorder, axis=3) for blob, port in [(W, 1), (R, 2), (B, 3)]: Op.create_and_connect_input_data_node( graph, rnn_layer, {'value': blob, 'shape': np.array(blob.shape, dtype=np.int64)}, {'in': port, 'permutation': None} )