def __init__(self, encoder_type, input_size, num_units, num_layers_main, num_layers_sub, num_classes_main, num_classes_sub, main_task_weight, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, parameter_init=0.1, clip_grad_norm=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None, time_major=True): super(MultitaskCTC, self).__init__( encoder_type, input_size, num_units, num_layers_main, num_classes_main, lstm_impl, use_peephole, splice, parameter_init, clip_grad_norm, clip_activation, num_proj, weight_decay, bottleneck_dim, time_major) self.num_classes_sub = num_classes_sub + 1 # + blank label if float(main_task_weight) < 0 or float(main_task_weight) > 1: raise ValueError('Set main_task_weight between 0 to 1.') self.main_task_weight = main_task_weight self.sub_task_weight = 1 - self.main_task_weight # Placeholder for multi-task self.labels_sub_pl_list = [] self.name = encoder_type + '_ctc' if ['multitask_blstm', 'multitask_lstm']: self.encoder = load(encoder_type)( num_units=num_units, num_proj=self.num_proj, num_layers_main=num_layers_main, num_layers_sub=num_layers_sub, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) else: raise NotImplementedError
def __init__(self, encoder_type, input_size, num_units, num_layers_main, num_layers_sub, num_classes_main, num_classes_sub, main_task_weight, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, parameter_init=0.1, clip_grad_norm=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None, time_major=True): super(MultitaskCTC, self).__init__(encoder_type, input_size, num_units, num_layers_main, num_classes_main, lstm_impl, use_peephole, splice, parameter_init, clip_grad_norm, clip_activation, num_proj, weight_decay, bottleneck_dim, time_major) self.num_classes_sub = num_classes_sub + 1 # + blank label if float(main_task_weight) < 0 or float(main_task_weight) > 1: raise ValueError('Set main_task_weight between 0 to 1.') self.main_task_weight = main_task_weight self.sub_task_weight = 1 - self.main_task_weight # Placeholder for multi-task self.labels_sub_pl_list = [] self.name = encoder_type + '_ctc' if ['multitask_blstm', 'multitask_lstm']: self.encoder = load(encoder_type)(num_units=num_units, num_proj=self.num_proj, num_layers_main=num_layers_main, num_layers_sub=num_layers_sub, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) else: raise NotImplementedError
def __init__(self, encoder_type, input_size, num_units, num_layers_main, num_layers_sub, num_classes_main, num_classes_sub, main_task_weight, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, parameter_init=0.1, clip_grad=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None): super(Multitask_CTC, self).__init__(input_size, splice, num_classes_main, num_classes_sub, main_task_weight, lstm_impl, clip_grad, weight_decay) self.name = encoder_type + '_ctc' if ['multitask_blstm', 'multitask_lstm']: self.encoder = load(encoder_type)( num_units=num_units, num_layers_main=num_layers_main, num_layers_sub=num_layers_sub, num_classes_main=num_classes_main + 1, num_classes_sub=num_classes_sub + 1, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, num_proj=num_proj, bottleneck_dim=bottleneck_dim) else: raise NotImplementedError
def check(self, encoder_type, lstm_impl=None, time_major=False): print('==================================================') print(' encoder_type: %s' % encoder_type) print(' lstm_impl: %s' % lstm_impl) print(' time_major: %s' % time_major) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 4 splice = 5 if encoder_type in ['vgg_blstm', 'vgg_lstm', 'vgg_wang', 'resnet_wang', 'cldnn_wang', 'cnn_zhang'] else 1 num_stack = 2 inputs, _, inputs_seq_len = generate_data( label_type='character', model='ctc', batch_size=batch_size, num_stack=num_stack, splice=splice) frame_num, input_size = inputs[0].shape # Define model graph if encoder_type in ['blstm', 'lstm']: encoder = load(encoder_type)( num_units=256, num_proj=None, num_layers=5, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['bgru', 'gru']: encoder = load(encoder_type)( num_units=256, num_layers=5, parameter_init=0.1, time_major=time_major) elif encoder_type in ['vgg_blstm', 'vgg_lstm', 'cldnn_wang']: encoder = load(encoder_type)( input_size=input_size // splice // num_stack, splice=splice, num_stack=num_stack, num_units=256, num_proj=None, num_layers=5, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['multitask_blstm', 'multitask_lstm']: encoder = load(encoder_type)( num_units=256, num_proj=None, num_layers_main=5, num_layers_sub=3, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: encoder = load(encoder_type)( input_size=input_size // splice // num_stack, splice=splice, num_stack=num_stack, parameter_init=0.1, time_major=time_major) # NOTE: topology is pre-defined else: raise NotImplementedError # Create placeholders inputs_pl = tf.placeholder(tf.float32, shape=[None, None, input_size], name='inputs') inputs_seq_len_pl = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') keep_prob_pl = tf.placeholder(tf.float32, name='keep_prob') # operation for forward computation if encoder_type in ['multitask_blstm', 'multitask_lstm']: hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob=keep_prob_pl, is_training=True) else: hidden_states_op, final_state_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob=keep_prob_pl, is_training=True) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format(total_parameters / 1000000))) # Make feed dict feed_dict = { inputs_pl: inputs, inputs_seq_len_pl: inputs_seq_len, keep_prob_pl: 0.9 } with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Make prediction if encoder_type in ['multitask_blstm', 'multitask_lstm']: encoder_outputs, final_state, hidden_states_sub, final_state_sub = sess.run( [hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op], feed_dict=feed_dict) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: encoder_outputs = sess.run( hidden_states_op, feed_dict=feed_dict) else: encoder_outputs, final_state = sess.run( [hidden_states_op, final_state_op], feed_dict=feed_dict) # Convert always to batch-major if time_major: encoder_outputs = encoder_outputs.transpose(1, 0, 2) if encoder_type in ['blstm', 'bgru', 'vgg_blstm', 'multitask_blstm', 'cldnn_wang']: if encoder_type != 'cldnn_wang': self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), encoder_outputs.shape) if encoder_type != 'bgru': self.assertEqual( (batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[0].h.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].h.shape) if encoder_type == 'multitask_blstm': self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), hidden_states_sub.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].h.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[1].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[1].h.shape) else: self.assertEqual( (batch_size, encoder.num_units), final_state[0].shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].shape) elif encoder_type in ['lstm', 'gru', 'vgg_lstm', 'multitask_lstm']: self.assertEqual( (batch_size, frame_num, encoder.num_units), encoder_outputs.shape) if encoder_type != 'gru': self.assertEqual( (batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[0].h.shape) if encoder_type == 'multitask_lstm': self.assertEqual( (batch_size, frame_num, encoder.num_units), hidden_states_sub.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].h.shape) else: self.assertEqual( (batch_size, encoder.num_units), final_state[0].shape) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.assertEqual(3, len(encoder_outputs.shape)) self.assertEqual( (batch_size, frame_num), encoder_outputs.shape[:2])
def __init__(self, encoder_type, input_size, num_units, num_layers, num_classes, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, parameter_init=0.1, clip_grad_norm=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None, time_major=True): super(CTC, self).__init__() assert input_size % 3 == 0, 'input_size must be divisible by 3 (+ delta, double delta features).' assert splice % 2 == 1, 'splice must be the odd number' if clip_grad_norm is not None: assert float( clip_grad_norm) > 0, 'clip_grad_norm must be larger than 0.' assert float( weight_decay) >= 0, 'weight_decay must not be a negative value.' # Encoder setting self.encoder_type = encoder_type self.input_size = input_size self.splice = splice self.num_units = num_units if int(num_proj) == 0: self.num_proj = None elif num_proj is not None: self.num_proj = int(num_proj) else: self.num_proj = None self.num_layers = num_layers self.bottleneck_dim = bottleneck_dim self.num_classes = num_classes + 1 # + blank self.lstm_impl = lstm_impl self.use_peephole = use_peephole # Regularization self.parameter_init = parameter_init self.clip_grad_norm = clip_grad_norm self.clip_activation = clip_activation self.weight_decay = weight_decay # Summaries for TensorBoard self.summaries_train = [] self.summaries_dev = [] # Placeholders self.inputs_pl_list = [] self.labels_pl_list = [] self.inputs_seq_len_pl_list = [] self.keep_prob_pl_list = [] self.time_major = time_major self.name = encoder_type + '_ctc' if encoder_type in ['blstm', 'lstm']: self.encoder = load(encoder_type)( num_units=num_units, num_proj=self.num_proj, num_layers=num_layers, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) elif encoder_type in ['vgg_blstm', 'vgg_lstm']: self.encoder = load(encoder_type)( input_size=input_size, splice=splice, num_units=num_units, num_proj=self.num_proj, num_layers=num_layers, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) elif encoder_type in ['bgru', 'gru']: self.encoder = load(encoder_type)( num_units=num_units, num_layers=num_layers, parameter_init=parameter_init, time_major=time_major) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.encoder = load(encoder_type)( input_size=input_size, splice=splice, parameter_init=parameter_init, time_major=time_major) else: self.encoder = None
def check_encode(self, encoder_type, lstm_impl=None): print('==================================================') print(' encoder_type: %s' % encoder_type) print(' lstm_impl: %s' % lstm_impl) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 4 splice = 11 if encoder_type in ['vgg_blstm', 'vgg_lstm', 'vgg_wang', 'resnet_wang', 'cnn_zhang'] else 1 inputs, _, inputs_seq_len = generate_data( label_type='character', model='ctc', batch_size=batch_size, splice=splice) frame_num, input_size = inputs[0].shape # Define model graph if encoder_type in ['blstm', 'lstm']: encoder = load(encoder_type)( num_units=256, num_layers=5, num_classes=0, # return hidden states lstm_impl=lstm_impl, parameter_init=0.1) elif encoder_type in ['bgru', 'gru']: encoder = load(encoder_type)( num_units=256, num_layers=5, num_classes=0, # return hidden states parameter_init=0.1) elif encoder_type in ['vgg_blstm', 'vgg_lstm']: encoder = load(encoder_type)( input_size=input_size // 11, splice=11, num_units=256, num_layers=5, num_classes=0, # return hidden states lstm_impl=lstm_impl, parameter_init=0.1) elif encoder_type in ['multitask_blstm', 'multitask_lstm']: encoder = load(encoder_type)( num_units=256, num_layers_main=5, num_layers_sub=3, num_classes_main=0, # return hidden states num_classes_sub=0, # return hidden states lstm_impl=lstm_impl, parameter_init=0.1) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: encoder = load(encoder_type)( input_size=input_size // 11, splice=11, num_classes=27, parameter_init=0.1) # NOTE: topology is pre-defined else: raise NotImplementedError # Create placeholders inputs_pl = tf.placeholder(tf.float32, shape=[None, None, input_size], name='inputs') inputs_seq_len_pl = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') keep_prob_input_pl = tf.placeholder(tf.float32, name='keep_prob_input') keep_prob_hidden_pl = tf.placeholder(tf.float32, name='keep_prob_hidden') keep_prob_output_pl = tf.placeholder(tf.float32, name='keep_prob_output') # operation for forward computation if encoder_type in ['multitask_blstm', 'multitask_lstm']: hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob_input=keep_prob_input_pl, keep_prob_hidden=keep_prob_hidden_pl, keep_prob_output=keep_prob_output_pl) else: hidden_states_op, final_state_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob_input=keep_prob_input_pl, keep_prob_hidden=keep_prob_hidden_pl, keep_prob_output=keep_prob_output_pl) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format(total_parameters / 1000000))) # Make feed dict feed_dict = { inputs_pl: inputs, inputs_seq_len_pl: inputs_seq_len, keep_prob_input_pl: 0.9, keep_prob_hidden_pl: 0.9, keep_prob_output_pl: 1.0 } with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Make prediction if encoder_type in ['multitask_blstm', 'multitask_lstm']: hidden_states, final_state, hidden_states_sub, final_state_sub = sess.run( [hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op], feed_dict=feed_dict) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: hidden_states = sess.run( hidden_states_op, feed_dict=feed_dict) else: hidden_states, final_state = sess.run( [hidden_states_op, final_state_op], feed_dict=feed_dict) if encoder_type in ['blstm', 'bgru', 'vgg_blstm', 'multitask_blstm']: self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), hidden_states.shape) if encoder_type in ['blstm', 'vgg_blstm', 'multitask_blstm']: self.assertEqual( (batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[0].h.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].h.shape) if encoder_type == 'multitask_blstm': self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), hidden_states_sub.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].h.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[1].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[1].h.shape) else: self.assertEqual( (batch_size, encoder.num_units), final_state[0].shape) self.assertEqual( (batch_size, encoder.num_units), final_state[1].shape) elif encoder_type in ['lstm', 'gru', 'vgg_lstm']: self.assertEqual( (batch_size, frame_num, encoder.num_units), hidden_states.shape) if encoder_type in ['lstm', 'vgg_lstm', 'multitask_lstm']: self.assertEqual( (batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state[0].h.shape) if encoder_type == 'multitask_lstm': self.assertEqual( (batch_size, frame_num, encoder.num_units), hidden_states_sub.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual( (batch_size, encoder.num_units), final_state_sub[0].h.shape) else: self.assertEqual( (batch_size, encoder.num_units), final_state[0].shape) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.assertEqual( (frame_num, batch_size, encoder.num_classes), hidden_states.shape)
def __init__(self, encoder_type, input_size, num_units, num_layers, num_classes, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, num_stack=1, parameter_init=0.1, clip_grad_norm=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None, time_major=True): super(CTC, self).__init__() assert input_size % 3 == 0, 'input_size must be divisible by 3 (+ delta, acceleration coefficients).' assert splice % 2 == 1, 'splice must be the odd number' if clip_grad_norm is not None: assert float( clip_grad_norm) > 0, 'clip_grad_norm must be larger than 0.' assert float( weight_decay) >= 0, 'weight_decay must not be a negative value.' # Encoder setting self.encoder_type = encoder_type self.input_size = input_size self.splice = splice self.num_stack = num_stack self.num_units = num_units if int(num_proj) == 0: self.num_proj = None elif num_proj is not None: self.num_proj = int(num_proj) else: self.num_proj = None self.num_layers = num_layers self.bottleneck_dim = bottleneck_dim self.num_classes = num_classes + 1 # + blank self.lstm_impl = lstm_impl self.use_peephole = use_peephole # Regularization self.parameter_init = parameter_init self.clip_grad_norm = clip_grad_norm self.clip_activation = clip_activation self.weight_decay = weight_decay # Summaries for TensorBoard self.summaries_train = [] self.summaries_dev = [] # Placeholders self.inputs_pl_list = [] self.labels_pl_list = [] self.inputs_seq_len_pl_list = [] self.keep_prob_pl_list = [] self.time_major = time_major self.name = encoder_type + '_ctc' if encoder_type in ['blstm', 'lstm']: self.encoder = load(encoder_type)( num_units=num_units, num_proj=self.num_proj, num_layers=num_layers, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) elif encoder_type in ['vgg_blstm', 'vgg_lstm', 'cldnn_wang']: self.encoder = load(encoder_type)( input_size=input_size, splice=splice, num_stack=num_stack, num_units=num_units, num_proj=self.num_proj, num_layers=num_layers, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, time_major=time_major) elif encoder_type in ['bgru', 'gru']: self.encoder = load(encoder_type)( num_units=num_units, num_layers=num_layers, parameter_init=parameter_init, time_major=time_major) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.encoder = load(encoder_type)( input_size=input_size, splice=splice, num_stack=num_stack, parameter_init=parameter_init, time_major=time_major) elif encoder_type in ['student_cnn_ctc', 'student_cnn_compact_ctc']: self.encoder = load(encoder_type)( input_size=input_size, splice=splice, num_stack=num_stack, parameter_init=parameter_init, time_major=time_major) else: raise NotImplementedError
def __init__(self, encoder_type, input_size, num_units, num_layers, num_classes, lstm_impl='LSTMBlockCell', use_peephole=True, splice=1, parameter_init=0.1, clip_grad=None, clip_activation=None, num_proj=None, weight_decay=0.0, bottleneck_dim=None): super(CTC, self).__init__( input_size, splice, num_classes, lstm_impl, clip_grad, weight_decay) self.name = encoder_type + '_ctc' if encoder_type in ['blstm', 'lstm']: self.encoder = load(encoder_type)(num_units=num_units, num_layers=num_layers, num_classes=num_classes + 1, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, num_proj=num_proj, bottleneck_dim=bottleneck_dim) elif encoder_type in ['vgg_blstm', 'vgg_lstm']: self.encoder = load(encoder_type)(input_size=input_size, splice=splice, num_units=num_units, num_layers=num_layers, num_classes=num_classes + 1, lstm_impl=lstm_impl, use_peephole=use_peephole, parameter_init=parameter_init, clip_activation=clip_activation, num_proj=num_proj, bottleneck_dim=bottleneck_dim) elif encoder_type in ['bgru', 'gru']: self.encoder = load(encoder_type)(num_units=num_units, num_layers=num_layers, num_classes=num_classes + 1, parameter_init=parameter_init, bottleneck_dim=bottleneck_dim) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.encoder = load(encoder_type)(input_size=input_size, splice=splice, num_classes=num_classes + 1, parameter_init=parameter_init) else: raise NotImplementedError
def check(self, encoder_type, lstm_impl=None, time_major=False): print('==================================================') print(' encoder_type: %s' % encoder_type) print(' lstm_impl: %s' % lstm_impl) print(' time_major: %s' % time_major) print('==================================================') tf.reset_default_graph() with tf.Graph().as_default(): # Load batch data batch_size = 4 splice = 5 if encoder_type in [ 'vgg_blstm', 'vgg_lstm', 'vgg_wang', 'resnet_wang', 'cldnn_wang', 'cnn_zhang' ] else 1 num_stack = 2 inputs, _, inputs_seq_len = generate_data(label_type='character', model='ctc', batch_size=batch_size, num_stack=num_stack, splice=splice) frame_num, input_size = inputs[0].shape # Define model graph if encoder_type in ['blstm', 'lstm']: encoder = load(encoder_type)(num_units=256, num_proj=None, num_layers=5, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['bgru', 'gru']: encoder = load(encoder_type)(num_units=256, num_layers=5, parameter_init=0.1, time_major=time_major) elif encoder_type in ['vgg_blstm', 'vgg_lstm', 'cldnn_wang']: encoder = load(encoder_type)(input_size=input_size // splice // num_stack, splice=splice, num_stack=num_stack, num_units=256, num_proj=None, num_layers=5, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['multitask_blstm', 'multitask_lstm']: encoder = load(encoder_type)(num_units=256, num_proj=None, num_layers_main=5, num_layers_sub=3, lstm_impl=lstm_impl, use_peephole=True, parameter_init=0.1, clip_activation=5, time_major=time_major) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: encoder = load(encoder_type)(input_size=input_size // splice // num_stack, splice=splice, num_stack=num_stack, parameter_init=0.1, time_major=time_major) # NOTE: topology is pre-defined else: raise NotImplementedError # Create placeholders inputs_pl = tf.placeholder(tf.float32, shape=[None, None, input_size], name='inputs') inputs_seq_len_pl = tf.placeholder(tf.int32, shape=[None], name='inputs_seq_len') keep_prob_pl = tf.placeholder(tf.float32, name='keep_prob') # operation for forward computation if encoder_type in ['multitask_blstm', 'multitask_lstm']: hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob=keep_prob_pl, is_training=True) else: hidden_states_op, final_state_op = encoder( inputs=inputs_pl, inputs_seq_len=inputs_seq_len_pl, keep_prob=keep_prob_pl, is_training=True) # Add the variable initializer operation init_op = tf.global_variables_initializer() # Count total parameters parameters_dict, total_parameters = count_total_parameters( tf.trainable_variables()) for parameter_name in sorted(parameters_dict.keys()): print("%s %d" % (parameter_name, parameters_dict[parameter_name])) print("Total %d variables, %s M parameters" % (len(parameters_dict.keys()), "{:,}".format( total_parameters / 1000000))) # Make feed dict feed_dict = { inputs_pl: inputs, inputs_seq_len_pl: inputs_seq_len, keep_prob_pl: 0.9 } with tf.Session() as sess: # Initialize parameters sess.run(init_op) # Make prediction if encoder_type in ['multitask_blstm', 'multitask_lstm']: encoder_outputs, final_state, hidden_states_sub, final_state_sub = sess.run( [ hidden_states_op, final_state_op, hidden_states_sub_op, final_state_sub_op ], feed_dict=feed_dict) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: encoder_outputs = sess.run(hidden_states_op, feed_dict=feed_dict) else: encoder_outputs, final_state = sess.run( [hidden_states_op, final_state_op], feed_dict=feed_dict) # Convert always to batch-major if time_major: encoder_outputs = encoder_outputs.transpose(1, 0, 2) if encoder_type in [ 'blstm', 'bgru', 'vgg_blstm', 'multitask_blstm', 'cldnn_wang' ]: if encoder_type != 'cldnn_wang': self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), encoder_outputs.shape) if encoder_type != 'bgru': self.assertEqual((batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state[0].h.shape) self.assertEqual((batch_size, encoder.num_units), final_state[1].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state[1].h.shape) if encoder_type == 'multitask_blstm': self.assertEqual( (batch_size, frame_num, encoder.num_units * 2), hidden_states_sub.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[0].h.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[1].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[1].h.shape) else: self.assertEqual((batch_size, encoder.num_units), final_state[0].shape) self.assertEqual((batch_size, encoder.num_units), final_state[1].shape) elif encoder_type in [ 'lstm', 'gru', 'vgg_lstm', 'multitask_lstm' ]: self.assertEqual( (batch_size, frame_num, encoder.num_units), encoder_outputs.shape) if encoder_type != 'gru': self.assertEqual((batch_size, encoder.num_units), final_state[0].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state[0].h.shape) if encoder_type == 'multitask_lstm': self.assertEqual( (batch_size, frame_num, encoder.num_units), hidden_states_sub.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[0].c.shape) self.assertEqual((batch_size, encoder.num_units), final_state_sub[0].h.shape) else: self.assertEqual((batch_size, encoder.num_units), final_state[0].shape) elif encoder_type in ['vgg_wang', 'resnet_wang', 'cnn_zhang']: self.assertEqual(3, len(encoder_outputs.shape)) self.assertEqual((batch_size, frame_num), encoder_outputs.shape[:2])