def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Params>') ifo_x_weights, ifo_x_weights_shape = read_binary_matrix(pb) try: use_dropout = collect_until_token_and_read(pb, b'<UseDropout>', np.bool) except Error: # layer have not UseDropout attribute, so setup it to False use_dropout = False mapping_rule = {'use_dropout': use_dropout} assert len( ifo_x_weights_shape ) == 2, "Unexpected shape of weights in LSTMNonLinearityComponent" assert ifo_x_weights_shape[ 0] == 3, "Unexpected shape of weights in LSTMNonLinearityComponent" ifo_x_weights = ifo_x_weights.reshape(ifo_x_weights_shape) embed_input(mapping_rule, 1, 'i_weights', ifo_x_weights[0][:]) embed_input(mapping_rule, 2, 'f_weights', ifo_x_weights[1][:]) embed_input(mapping_rule, 3, 'o_weights', ifo_x_weights[2][:]) LstmNonLinearity.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<PoolSize>') kernel = read_binary_integer32_token(pb) tag = find_next_tag(pb) if tag == '<PoolStep>': read_placeholder(pb, 1) stride = read_binary_integer32_token(pb) pool_step = stride pool_stride = read_token_value(pb, b'<PoolStride>') elif tag == '<PoolStride>': stride = 1 pool_step = None read_placeholder(pb, 1) pool_stride = read_binary_integer32_token(pb) else: raise Error('Can not extract parameters for {}'.format(node)) mapping_rule = { 'window': np.array([1, 1, 1, kernel], dtype=np.int64), 'stride': np.array([1, 1, 1, stride], dtype=np.int64), 'pool_stride': pool_stride, 'pool_step': pool_step, 'pad': np.array([[0, 0], [0, 0], [0, 0], [0, 0]], dtype=np.int64), 'pad_spatial_shape': np.array([[0, 0], [0, 0]], dtype=np.int64), 'pool_method': 'max', } mapping_rule.update(layout_attrs()) Pooling.update_node_stat(node, mapping_rule) return cls.enabled
def load_priors(file_descr, graph): try: collect_until_token(file_descr, b'<Priors>') except Error: # just ignore if priors were not found return if graph.graph['cmd_params'].counts is not None: graph.graph['priors'] = read_binary_vector(file_descr) else: log.error( "Model contains Prior values, if you want to embed them into the generated IR add option --counts=\"\" to command line", extra={'is_warning': True})
def extract(cls, node): pb = node.parameters size = collect_until_token_and_read(pb, b'<OutputDim>') collect_until_token(pb, b'<DropoutProportion>') dropout_proportion = read_binary_float_token(pb) DropoutMask.update_node_stat(node, { 'dropout_proportion': 1.0 - dropout_proportion, 'size': size }) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Dim>') dim = read_binary_integer32_token(pb) collect_until_token(pb, b'<Scale>') scale = read_binary_float_token(pb) # TODO add real batch here attrs = {} embed_input(attrs, 1, 'weights', np.full([dim], scale)) ScaleShiftOp.update_node_stat(node, attrs) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Params>') weights = read_binary_vector(pb) find_next_tag(pb) read_placeholder(pb, 1) mapping_rule = { 'layout': 'NCHW' } embed_input(mapping_rule, 1, 'weights', weights) ScaleShiftOp.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Params>') weights, weights_shape = read_binary_matrix(pb) mapping_rule = { 'out-size': weights_shape[0], 'transpose_weights': True, } embed_input(mapping_rule, 1, 'weights', weights) FullyConnected.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<ConvolutionModel>') in_shape = read_token_value(pb, b'<NumFiltersIn>') out_shape = read_token_value(pb, b'<NumFiltersOut>') height_in = read_token_value(pb, b'<HeightIn>') height_out = read_token_value(pb, b'<HeightOut>') height_subsample = read_token_value(pb, b'<HeightSubsampleOut>') collect_until_token(pb, b'<Offsets>') offsets = read_binary_vector_of_pairs(pb, read_token=False, dtype=np.int32) collect_until_token(pb, b'<RequiredTimeOffsets>') time_offsets = read_binary_vector(pb, read_token=False, dtype=np.int32) collect_until_token(pb, b'<LinearParams>') weights, _ = read_binary_matrix(pb) collect_until_token(pb, b'<BiasParams>') biases = read_binary_vector(pb) offsets = offsets.reshape([len(offsets) // 2, 2]) mapping_rule = { # stride for h axis 'height_subsample': height_subsample, # input dimension for h axis 'height_in': height_in, # output dimension for h axis 'height_out': height_out, # input dimension for channel axis 'in_channels': in_shape, # output dimension for channel axis 'out_channels': out_shape, # array with pairs like the following # [ (-1, -1) (-1, 0) (-1, 1) # (0, -1) (0, 0) (0, 1) # (1, -1) (1, 0) (1, 1)] # it means that kernel 3x3 will be applied to calculate current value of output 'offsets': offsets, # required time offsets to calculate current convolution # time_offsets = [-1, 0, 1] for previous example means no padding for time axis and # 3 values should be prepared # time_offsets = [0] means zero padding [1, 1] for time axis 'time_offsets': time_offsets, 'out-size': out_shape * height_out } embed_input(mapping_rule, 1, 'weights', weights) embed_input(mapping_rule, 2, 'biases', biases) TimeHeightConvolutionComponent.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Bias>') biases = read_binary_vector(pb) find_next_tag(pb) read_placeholder(pb, 1) mapping_rule = { 'layout': 'NCHW', 'bias_term': True, 'out-size': biases.shape[0], } embed_input(mapping_rule, 2, 'biases', biases) ScaleShiftOp.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<LinearParams>') weights, weights_shape = read_binary_matrix(pb) tag = find_next_tag(pb) read_placeholder(pb, 1) if tag != '<BiasParams>': raise Error('FixedAffineComponent must contain BiasParams') biases = read_binary_vector(pb) mapping_rule = { 'out-size': weights_shape[0], 'transpose_weights': True, } embed_input(mapping_rule, 1, 'weights', weights) embed_input(mapping_rule, 2, 'biases', biases) FullyConnected.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters try: collect_until_token(pb, b'<InputDim>') except Error: raise Error("<InputDim> was not found") in_dim = read_binary_integer32_token(pb) try: collect_until_token(pb, b'<OutputDim>') except Error: raise Error("<OutputDim> was not found") out_dim = read_binary_integer32_token(pb) assert in_dim % out_dim == 0 group = in_dim / out_dim try: collect_until_token(pb, b'<P>') except Error: raise Error("<P> was not found") p = read_binary_float_token(pb) attrs = { 'group': group, 'p': p, } PNormOp.update_node_stat(node, attrs) return cls.enabled
def extract(cls, node): clip_value = 50 pb = node.parameters res = collect_until_whitespace(pb) if res == b'<CellClip>': clip_value = get_uint32(pb.read(4)) collect_until_token(pb, b'FM') gifo_x_weights, gifo_x_weights_shape = read_binary_matrix(pb, False) gifo_r_weights, gifo_r_weights_shape = read_binary_matrix(pb) gifo_biases = read_binary_vector(pb) input_gate_weights = read_binary_vector(pb) forget_gate_weights = read_binary_vector(pb) output_gate_weights = read_binary_vector(pb) projection_weights, projection_weights_shape = read_binary_matrix(pb) mapping_rule = { 'gifo_x_weights_shape': gifo_x_weights_shape, 'gifo_r_weights_shape': gifo_r_weights_shape, 'projection_weights_shape': projection_weights_shape, 'clip_value': clip_value, 'format': 'kaldi', } embed_input(mapping_rule, 1, 'gifo_x_weights', gifo_x_weights) embed_input(mapping_rule, 2, 'gifo_r_weights', gifo_r_weights) embed_input(mapping_rule, 3, 'gifo_biases', gifo_biases) embed_input(mapping_rule, 4, 'input_gate_weights', input_gate_weights) embed_input(mapping_rule, 5, 'forget_gate_weights', forget_gate_weights) embed_input(mapping_rule, 6, 'output_gate_weights', output_gate_weights) embed_input(mapping_rule, 7, 'projection_weights', projection_weights) LSTMCell.update_node_stat(node, mapping_rule) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Dim>') dim = read_binary_integer32_token(pb) collect_until_token(pb, b'<BlockDim>') block_dim = read_binary_integer32_token(pb) collect_until_token(pb, b'<TimePeriod>') time_period = read_binary_integer32_token(pb) collect_until_token(pb, b'<DropoutProportion>') dropout_proporion = read_binary_float_token(pb) # collect_until_token(pb, b'<Continuous>') Identity.update_node_stat(node, {}) return cls.enabled
def extract(cls, node): pb = node.parameters try: collect_until_token(pb, b'<Dim>') except Error: try: pb.seek(0) collect_until_token(pb, b'<InputDim>') except Error: raise Error("Neither <Dim> nor <InputDim> were found") in_dim = read_binary_integer32_token(pb) try: collect_until_token(pb, b'<TargetRms>') target_rms = read_binary_float_token(pb) except Error: # model does not contain TargetRms target_rms = 1.0 try: collect_until_token(pb, b'<AddLogStddev>') add_log = read_binary_bool_token(pb) except Error: # model does not contain AddLogStddev add_log = False if add_log is not False: raise Error("AddLogStddev True in Normalize component is not supported") scale = target_rms * np.sqrt(in_dim) attrs = { 'eps': 0.00000001, 'across_spatial': 0, 'channel_shared': 1, 'in_dim': in_dim, } embed_input(attrs, 1, 'weights', [scale]) NormalizeOp.update_node_stat(node, attrs) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<MaxChange>') max_change = read_binary_float_token(pb) collect_until_token(pb, b'<L2Regularize>') collect_until_token(pb, b'<LearningRate>') collect_until_token(pb, b'<TimeOffsets>') time_offsets = read_binary_vector(pb, False, np.int32) collect_until_token(pb, b'<LinearParams>') weights, weights_shape = read_binary_matrix(pb) collect_until_token(pb, b'<BiasParams>') bias_params = read_binary_vector(pb) collect_until_token(pb, b'<OrthonormalConstraint>') orthonormal_constraint = read_binary_float_token(pb) # used only on training collect_until_token(pb, b'<UseNaturalGradient>') use_natural_grad = read_binary_bool_token(pb) # used only on training collect_until_token(pb, b'<NumSamplesHistory>') num_samples_hist = read_binary_float_token(pb) collect_until_token(pb, b'<AlphaInOut>') alpha_in_out = read_binary_float_token(pb), read_binary_float_token(pb) # for training, usually (4, 4) # according to Kaldi documentation http://kaldi-asr.org/doc/classkaldi_1_1nnet3_1_1TdnnComponent.html#details # it looks like it's used only during training (but not 100% sure) collect_until_token(pb, b'<RankInOut>') rank_in_out = read_binary_integer32_token(pb), read_binary_integer32_token(pb) biases = mo_array(bias_params) if len(bias_params) != 0 else None attrs = { 'weights': np.reshape(weights, weights_shape), 'biases': biases, 'time_offsets': time_offsets, } TdnnComponent.update_node_stat(node, attrs) return cls.enabled
def extract(cls, node): pb = node.parameters collect_until_token(pb, b'<Dim>') dim = read_binary_integer32_token(pb) collect_until_token(pb, b'<BlockDim>') block_dim = read_binary_integer32_token(pb) collect_until_token(pb, b'<Epsilon>') eps = read_binary_float_token(pb) collect_until_token(pb, b'<TargetRms>') target_rms = read_binary_float_token(pb) collect_until_token(pb, b'<StatsMean>') mean = read_binary_vector(pb) collect_until_token(pb, b'<StatsVar>') var = read_binary_vector(pb) scale = target_rms / np.sqrt(var + eps) shift = -target_rms * mean / np.sqrt(var + eps) scale = np.tile(scale, dim // block_dim) shift = np.tile(shift, dim // block_dim) attrs = {'out-size': dim} embed_input(attrs, 1, 'weights', scale) embed_input(attrs, 2, 'biases', shift) ScaleShiftOp.update_node_stat(node, attrs) return cls.enabled
def load_parallel_component(file_descr, graph: Graph, prev_layer_id): """ Load ParallelComponent of the Kaldi model. ParallelComponent contains parallel nested networks. VariadicSplit is inserted before nested networks. Outputs of nested networks concatenate with layer Concat. :param file_descr: descriptor of the model file :param graph: graph with the topology. :param prev_layer_id: id of the input layers for parallel component layer :return: id of the concat layer - last layer of the parallel component layers """ nnet_count = read_token_value(file_descr, b'<NestedNnetCount>') log.debug( 'Model contains parallel component with {} nested networks'.format( nnet_count)) split_points = [] outputs = [] inputs = [] for i in range(nnet_count): read_token_value(file_descr, b'<NestedNnet>') collect_until_token(file_descr, b'<Nnet>') g = Graph() load_kalid_nnet1_model(g, file_descr, 'Nested_net_{}'.format(i)) # input to nnet1 models is of a rank 1 but we also insert batch_size to 0th axis # 1st axis contains input_size of the nested subnetwork # we split input from the main network to subnetworks input_node = Node(g, 'Parameter') split_points.append(input_node['shape'][1]) g.remove_node(input_node.id) mapping = { node: graph.unique_id(node) for node in g.nodes(data=False) if node in graph } g = nx.relabel_nodes(g, mapping) for val in mapping.values(): g.node[val]['name'] = val graph.add_nodes_from(g.nodes(data=True)) graph.add_edges_from(g.edges(data=True)) sorted_nodes = tuple(nx.topological_sort(g)) outputs.append(Node(graph, sorted_nodes[-1])) inputs.append(Node(graph, sorted_nodes[0])) split_id = graph.unique_id(prefix='NestedNets/VariadicSplit') attrs = { 'out_ports_count': nnet_count, 'size_splits': split_points, 'axis': 1, 'name': split_id } variadic_split_node = AttributedVariadicSplit(graph, attrs).create_node() prev_layer_node = Node(graph, prev_layer_id) prev_layer_node.add_output_port(0) graph.create_edge( prev_layer_node, variadic_split_node, 0, 0, create_edge_attrs(prev_layer_id, variadic_split_node.id, prev_layer_id)) concat_id = graph.unique_id(prefix='Concat') graph.add_node(concat_id, parameters=None, op='concat', kind='op') concat_node = Node(graph, concat_id) # Connect each output of variadic_split_node to each subnetwork's inputs in ParallelComponent # and each subnetwork's output to concat_node for i, (input_node, output_node) in enumerate(zip(inputs, outputs)): output_node.add_output_port(0) concat_node.add_input_port(i) graph.create_edge( output_node, concat_node, 0, i, create_edge_attrs(output_node.id, concat_id, output_node.id, i, 0)) graph.create_edge( variadic_split_node, input_node, i, 0, create_edge_attrs(variadic_split_node.id, input_node.id, variadic_split_node.id, 0, i)) return concat_id
def load_components(file_descr, graph, component_layer_map=None): num_components = collect_until_token_and_read(file_descr, b'<NumComponents>') log.debug('Network contains {} components'.format(num_components)) is_nnet3 = False if component_layer_map is None else True if not is_nnet3: collect_until_token(file_descr, b'<Components>') all_components = list() name = "" for _ in range(num_components): if is_nnet3: name = collect_until_token_and_read(file_descr, b'<ComponentName>', np.string_) component_type = find_next_component(file_descr) if component_type == end_of_nnet_tag.lower()[1:-1]: break start_index = file_descr.tell() end_tag, end_index = find_end_of_component(file_descr, component_type) # read dim info where possible to simplify shape calculation for MemoryOffset # shape calculation for MemoryOffset can't be done through shape of previous layer because # it is separated in 2 parts to remove cycle from graph file_descr.seek(start_index) dim = 0 dim_words = {b'<Dim>', b'<InputDim>'} for dim_word in dim_words: try: collect_until_token(file_descr, dim_word, size_search_zone=end_index - start_index) cur_index = file_descr.tell() if start_index < cur_index < end_index: dim = read_binary_integer32_token(file_descr) break else: file_descr.seek(start_index) except Error: file_descr.seek(start_index) if is_nnet3: if name in component_layer_map: layer_id = component_layer_map[name][0] for layer in component_layer_map[name]: node = Node(graph, layer) node['parameters'] = get_parameters( file_descr, start_index, end_index) node['op'] = component_type # Read dim info where possible to simplify shape calculation for MemoryOffset for o_n_name, params in node.get_outputs(): o_n = Node(graph, o_n_name) if o_n['op'] == 'MemoryOffset' and dim != 0: o_n['parameters']['element_size'] = int64_array( [1, dim]) else: raise Error("Something wrong with layer {}".format(name)) else: layer_id = graph.unique_id(prefix=component_type) graph.add_node(layer_id, parameters=get_parameters(file_descr, start_index, end_index), op=component_type, kind='op') if hasattr(graph, 'op_names_statistic'): graph.op_names_statistic[component_type] += 1 all_components.append(layer_id) log.debug('{} (type is {}) was loaded'.format(layer_id, component_type)) return all_components