def prediction(self): network = rnn_cell.LSTMCell(self._num_hidden) network = rnn_cell.DropoutWrapper(network, output_keep_prob=self.dropout) if self._num_layers > 1: network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, state = rnn.dynamic_rnn(network, self.data, dtype=tf.float32, sequence_length=self.length) max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def prediction(self): # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), data, dtype=tf.float32, sequence_length=self.length, ) last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def prediction(self): # Recurrent network. network = rnn_cell.GRUCell(self._num_hidden) network = rnn_cell.DropoutWrapper( network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32) # Select last output. output = tf.transpose(output, [1, 0, 2]) last = tf.gather(output, int(output.get_shape()[0]) - 1) # Softmax layer. weight, bias = self._weight_and_bias( self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def prediction(self): # Recurrent network. network = rnn_cell.LSTMCell(self._num_hidden) network = rnn_cell.DropoutWrapper( network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, self.data, dtype=tf.float32) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def prediction(self): # Recurrent network. network = rnn_cell.GRUCell(self._num_hidden) network = rnn_cell.DropoutWrapper( network, output_keep_prob=self.dropout) network = rnn_cell.MultiRNNCell([network] * self._num_layers) output, _ = rnn.dynamic_rnn(network, data, dtype=tf.float32) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def prediction(self): #运行结果给cost计算交叉熵或者计算error等损失函数 # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), data, dtype=tf.float32, sequence_length=self.length, ) #训练结束后,传进来一个序列进行预测时,dynamic_rnn的output要进行last_relevant last = self._last_relevant(output, self.length) # Softmax layer. weight, bias = self._weight_and_bias(self._num_hidden, int(self.target.get_shape()[1])) prediction = tf.nn.softmax(tf.matmul(last, weight) + bias) return prediction
def prediction(self): # Recurrent network. output, _ = rnn.dynamic_rnn( rnn_cell.GRUCell(self._num_hidden), self.data, dtype=tf.float32, sequence_length=self.length, ) # Softmax layer. max_length = int(self.target.get_shape()[1]) num_classes = int(self.target.get_shape()[2]) weight, bias = self._weight_and_bias(self._num_hidden, num_classes) # Flatten to apply same weights to all time steps. output = tf.reshape(output, [-1, self._num_hidden]) prediction = tf.nn.softmax(tf.matmul(output, weight) + bias) prediction = tf.reshape(prediction, [-1, max_length, num_classes]) return prediction
def create_rnn(max_steps, n_input, mem_nrow, mem_ncol): # Batch size, max_steps, n_input x = tf.placeholder("float", [None, None, n_input]) y = tf.placeholder("float", [None, None, n_input]) nsteps = tf.placeholder("int32") ntm_cell = ntm.NTMCell( n_inputs=n_input, n_outputs=n_input, n_hidden=100, mem_nrows=mem_nrow, mem_ncols=mem_ncol, n_heads=1, ) outputs, _ = rnn.dynamic_rnn( ntm_cell, x, dtype=tf.float32, sequence_length=nsteps, ) # Loss measures cost = var_seq_loss(outputs, y, nsteps) err = bits_err_per_seq(outputs, y, nsteps) # Optimizer params as described in paper. opt = tf.train.RMSPropOptimizer( learning_rate=1e-4, momentum=0.9, ) # Gradient clipping as described in paper. gvs = opt.compute_gradients(cost) clipped_gvs = [] for g, v in gvs: clipped_gvs.append((tf.clip_by_value(g, -10, 10), v)) optimizer = opt.apply_gradients(clipped_gvs) return { 'x': x, 'y': y, 'steps': nsteps, 'cost': cost, 'err': err, 'optimizer': optimizer, 'pred': predict(outputs, nsteps), }
def __init__(self, num_labels, num_layers, hidden_size, dropout, batch_size, learning_rate, lr_decay_factor, grad_clip, max_input_seq_length, max_target_seq_length, input_dim, forward_only=False): ''' Acoustic rnn model, using ctc loss with lstm cells Inputs: num_labels - dimension of character input/one hot encoding num_layers - number of lstm layers hidden_size - size of hidden layers dropout - probability of dropping hidden weights batch_size - number of training examples fed at once learning_rate - learning rate parameter fed to optimizer grad_clip - max gradient size (prevent exploding gradients) max_seq_length - maximum length of input vector sequence input_dim - dimension of input vector forward_only - whether to build back prop nodes or not ''' self.dropout = dropout self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * lr_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.dropout_keep_prob_lstm_input = tf.constant(self.dropout) self.dropout_keep_prob_lstm_output = tf.constant(self.dropout) self.max_input_seq_length = max_input_seq_length self.max_target_seq_length = max_target_seq_length #graph inputs self.inputs = tf.placeholder(tf.float32, shape=[self.max_input_seq_length, None, input_dim], name="inputs") self.input_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths") self.target_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="target_seq_lengths") #graph sparse tensor inputs self.target_indices = tf.placeholder(tf.int64, shape=[None,2], name="target_indices") self.target_vals = tf.placeholder(tf.int32, shape=[None], name="target_vals") #define cells of acoustic model cell = rnn_cell.DropoutWrapper( rnn_cell.BasicLSTMCell(hidden_size), input_keep_prob=self.dropout_keep_prob_lstm_input, output_keep_prob=self.dropout_keep_prob_lstm_output) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers) #build input layer w_i = tf.get_variable("input_w", [input_dim, hidden_size]) b_i = tf.get_variable("input_b", [hidden_size]) #make rnn inputs inputs = [tf.nn.xw_plus_b(tf.squeeze(i), w_i, b_i) for i in tf.split(0, self.max_input_seq_length, self.inputs)] #set rnn init state to 0s initial_state = cell.zero_state(self.batch_size, tf.float32) #build rnn rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs), sequence_length=self.input_seq_lengths, initial_state=initial_state, time_major=True, parallel_iterations=100) #build output layer w_o = tf.get_variable("output_w", [hidden_size, num_labels]) b_o = tf.get_variable("output_b", [num_labels]) #compute logits self.logits = [tf.nn.xw_plus_b(tf.squeeze(i), w_o, b_o) for i in tf.split(0, self.max_input_seq_length, rnn_output)] #setup sparse tensor for input into ctc loss sparse_labels = tf.SparseTensor( indices=self.target_indices, values=self.target_vals, shape=[self.batch_size, self.max_target_seq_length]) #compute ctc loss self.ctc_loss = ctc.ctc_loss(tf.pack(self.logits), sparse_labels, self.input_seq_lengths) self.mean_loss = tf.reduce_mean(self.ctc_loss) params = tf.trainable_variables() if not forward_only: opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.ctc_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, grad_clip) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def build_model(self): with tf.variable_scope('RNNTEST'): self.sense = tf.placeholder(tf.int32, [None]) self.arg1 = tf.placeholder(tf.int32, [None, None, 4]) self.arg2 = tf.placeholder(tf.int32, [None, None, 4]) self.arg1_len = tf.placeholder(tf.int32, [None]) self.arg2_len = tf.placeholder(tf.int32, [None]) self.keep_prob = tf.placeholder(tf.float32) arg1_list = tf.split(2, 4, self.arg1) arg2_list = tf.split(2, 4, self.arg2) with tf.device('/cpu:0'): NER_W = tf.get_variable('NER_embed', [ self.data_loader.NER_vocab_size, self.NER_embed_size ]) if self.NER_embed_size > 0 else None lemma_W = tf.get_variable('lemma_embed', [ self.data_loader.lemma_vocab_size, self.lemma_embed_size ]) if self.lemma_embed_size > 0 else None if self.use_pre_trained_embedding: word_W = tf.get_variable( 'word_embed', initializer=tf.convert_to_tensor( self.data_loader.pre_trained_word_embeddings, dtype=tf.float32) ) if self.word_embed_size > 0 else None else: word_W = tf.get_variable( 'word_embed', shape=[ self.data_loader.word_vocab_size, self.word_embed_size ]) if self.word_embed_size > 0 else None POS_W = tf.get_variable('POS_embed', [ self.data_loader.POS_vocab_size, self.POS_embed_size ]) if self.POS_embed_size > 0 else None arg1_embed_list = [] arg2_embed_list = [] for idx, W in enumerate([NER_W, lemma_W, word_W, POS_W]): if W is not None: arg1_embed_list.append( tf.nn.embedding_lookup(W, tf.squeeze(arg1_list[idx], [2]))) arg2_embed_list.append( tf.nn.embedding_lookup(W, tf.squeeze(arg2_list[idx], [2]))) arg1 = tf.nn.dropout(tf.concat(2, arg1_embed_list), self.keep_prob) arg2 = tf.nn.dropout(tf.concat(2, arg2_embed_list), self.keep_prob) encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size) decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size) with tf.variable_scope('forward_encoder'): forward_encoder_outputs, forward_encoder_state = rnn.dynamic_rnn( encoder_lstm_unit, arg1, self.arg1_len, dtype=tf.float32) with tf.variable_scope('backward_encoder'): backward_encoder_outputs, backward_encoder_state = rnn.dynamic_rnn( encoder_lstm_unit, tf.reverse_sequence(arg1, tf.cast(self.arg1_len, tf.int64), 1), dtype=tf.float32) encoder_outputs = tf.concat(2, [ forward_encoder_outputs, tf.reverse_sequence(backward_encoder_outputs, tf.cast(self.arg1_len, tf.int64), 1) ]) encoder_state = tf.concat( 1, [forward_encoder_state, backward_encoder_state]) source = tf.expand_dims( encoder_outputs, 2) #batch_size x source_len x 1 x source_depth(2*encoder_size) attention_W = tf.get_variable( 'attention_W', [1, 1, 2 * self.encoder_size, self.attention_judge_size]) attention_V = tf.get_variable('attention_V', [self.attention_judge_size]) WxH = tf.nn.conv2d( source, attention_W, [1, 1, 1, 1], 'SAME') #batch_size x source_len x 1 x attention self.mask = tf.placeholder(tf.float32, [None, None]) def attention(input_t, output_t_minus_1, time): with tf.variable_scope('attention'): VxS = tf.reshape( rnn_cell.linear(output_t_minus_1, self.attention_judge_size, True), [-1, 1, 1, self.attention_judge_size ]) #batch_size x 1 x 1 x attention _exp = tf.exp( tf.reduce_sum(attention_V * tf.tanh(WxH + VxS), [3])) #batch_size x source_len x 1 _exp = _exp * tf.expand_dims(self.mask, -1) attention_weight = _exp / tf.reduce_sum(_exp, [1], keep_dims=True) attention_t = tf.reduce_sum(encoder_outputs * attention_weight, [1]) feed_in_t = tf.tanh( rnn_cell.linear([attention_t, input_t], self.embedding_size, True)) return feed_in_t with tf.variable_scope('decoder'): decoder_outputs, decoder_state = dynamic_rnn_decoder( arg2, decoder_lstm_unit, initial_state=encoder_state, sequence_length=self.arg2_len, loop_function=attention) judge = tf.concat(1, [ tf.reduce_sum(decoder_outputs, [1]) / tf.expand_dims(tf.cast(self.arg2_len, tf.float32), -1), tf.reduce_sum(encoder_outputs, [1]) / tf.expand_dims(tf.cast(self.arg1_len, tf.float32), -1) ]) unscaled_log_distribution = rnn_cell.linear( judge, self.data_loader.sense_vocab_size, True) self.output = tf.cast(tf.argmax(unscaled_log_distribution, 1), tf.int32) self.accuracy = tf.reduce_mean( tf.cast(tf.equal(self.output, self.sense), tf.float32)) #max-margin method #self._MM = tf.placeholder(tf.int32,[None]) #margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM)) #self.loss = tf.reduce_mean(margin) #maximum likelihood method self.loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( unscaled_log_distribution, self.sense)) self.optimizer = tf.train.AdagradOptimizer(self.lr) self.train_op = self.optimizer.minimize(self.loss)
def __init__(self, session, num_labels, num_layers, hidden_size, dropout, batch_size, learning_rate, lr_decay_factor, grad_clip, max_input_seq_length, max_target_seq_length, input_dim, forward_only=False, tensorboard_dir=None, tb_run_name=None): """ Acoustic rnn model, using ctc loss with lstm cells Inputs: session - tensorflow session num_labels - dimension of character input/one hot encoding num_layers - number of lstm layers hidden_size - size of hidden layers dropout - probability of dropping hidden weights batch_size - number of training examples fed at once learning_rate - learning rate parameter fed to optimizer lr_decay_factor - decay factor of the learning rate grad_clip - max gradient size (prevent exploding gradients) max_input_seq_length - maximum length of input vector sequence max_target_seq_length - maximum length of ouput vector sequence input_dim - dimension of input vector forward_only - whether to build back prop nodes or not tensorboard_dir - path to tensorboard file (None if not activated) """ # Define GraphKeys for TensorBoard graphkey_training = tf.GraphKeys() graphkey_test = tf.GraphKeys() self.dropout = dropout self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False, name='learning_rate') tf.scalar_summary('Learning rate', self.learning_rate, collections=[graphkey_training, graphkey_test]) self.learning_rate_decay_op = self.learning_rate.assign(self.learning_rate * lr_decay_factor) self.global_step = tf.Variable(0, trainable=False, name='global_step') self.dropout_keep_prob_lstm_input = tf.constant(self.dropout) self.dropout_keep_prob_lstm_output = tf.constant(self.dropout) self.max_input_seq_length = max_input_seq_length self.max_target_seq_length = max_target_seq_length self.tensorboard_dir = tensorboard_dir # Initialize data pipes and audio_processor to None self.train_conn = None self.test_conn = None self.audio_processor = None # graph inputs self.inputs = tf.placeholder(tf.float32, shape=[self.max_input_seq_length, None, input_dim], name="inputs") # We could take an int16 for less memory consumption but CTC need an int32 self.input_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths") # Take an int16 for less memory consumption # max_target_seq_length should be less than 65535 (which is huge) self.target_seq_lengths = tf.placeholder(tf.int16, shape=[None], name="target_seq_lengths") # Define cells of acoustic model cell = rnn_cell.BasicLSTMCell(hidden_size, state_is_tuple=True) if not forward_only: # If we are in training then add a dropoutWrapper to the cells cell = rnn_cell.DropoutWrapper(cell, input_keep_prob=self.dropout_keep_prob_lstm_input, output_keep_prob=self.dropout_keep_prob_lstm_output) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True) # build input layer with tf.name_scope('Input_Layer'): w_i = tf.Variable(tf.truncated_normal([input_dim, hidden_size], stddev=np.sqrt(2.0 / (2 * hidden_size))), name="input_w") b_i = tf.Variable(tf.zeros([hidden_size]), name="input_b") # make rnn inputs inputs = [tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_i) + b_i for i in tf.split(0, self.max_input_seq_length, self.inputs)] # set rnn init state to 0s init_state = cell.zero_state(self.batch_size, tf.float32) # build rnn with tf.name_scope('Dynamic_rnn'): rnn_output, self.hidden_state = rnn.dynamic_rnn(cell, tf.pack(inputs), sequence_length=self.input_seq_lengths, initial_state=init_state, time_major=True, parallel_iterations=1000) # build output layer with tf.name_scope('Output_layer'): w_o = tf.Variable(tf.truncated_normal([hidden_size, num_labels], stddev=np.sqrt(2.0 / (2 * num_labels))), name="output_w") b_o = tf.Variable(tf.zeros([num_labels]), name="output_b") # compute logits self.logits = tf.pack([tf.matmul(tf.squeeze(i, squeeze_dims=[0]), w_o) + b_o for i in tf.split(0, self.max_input_seq_length, rnn_output)]) # compute prediction self.prediction = tf.to_int32(ctc.ctc_beam_search_decoder(self.logits, self.input_seq_lengths)[0][0]) if not forward_only: # graph sparse tensor inputs # We could take an int16 for less memory consumption but SparseTensor need an int64 self.target_indices = tf.placeholder(tf.int64, shape=[None, 2], name="target_indices") # We could take an int8 for less memory consumption but CTC need an int32 self.target_vals = tf.placeholder(tf.int32, shape=[None], name="target_vals") # setup sparse tensor for input into ctc loss sparse_labels = tf.SparseTensor( indices=self.target_indices, values=self.target_vals, shape=[self.batch_size, self.max_target_seq_length]) # compute ctc loss self.ctc_loss = ctc.ctc_loss(self.logits, sparse_labels, self.input_seq_lengths) self.mean_loss = tf.reduce_mean(self.ctc_loss) tf.scalar_summary('Mean loss (Training)', self.mean_loss, collections=[graphkey_training]) tf.scalar_summary('Mean loss (Test)', self.mean_loss, collections=[graphkey_test]) params = tf.trainable_variables() opt = tf.train.AdamOptimizer(self.learning_rate) gradients = tf.gradients(self.ctc_loss, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, grad_clip) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) # Accuracy with tf.name_scope('Accuracy'): errorRate = tf.reduce_sum(tf.edit_distance(self.prediction, sparse_labels, normalize=False)) / \ tf.to_float(tf.size(sparse_labels.values)) tf.scalar_summary('Error Rate (Training)', errorRate, collections=[graphkey_training]) tf.scalar_summary('Error Rate (Test)', errorRate, collections=[graphkey_test]) # TensorBoard init if self.tensorboard_dir is not None: self.train_summaries = tf.merge_all_summaries(key=graphkey_training) self.test_summaries = tf.merge_all_summaries(key=graphkey_test) if tb_run_name is None: run_name = datetime.now().strftime('%Y-%m-%d--%H-%M-%S') else: run_name = tb_run_name self.summary_writer = tf.train.SummaryWriter(tensorboard_dir + '/' + run_name + '/', graph=session.graph) else: self.summary_writer = None # We need to save all variables except for the hidden_state # we keep it across batches but we don't need it across different runs # Especially when we process a one time file save_list = [var for var in tf.all_variables() if var.name.find('hidden_state') == -1] self.saver = tf.train.Saver(save_list)
def define_seq2seq_rnn_for_training(image_input_data,image_input_lengths,label_rnn_input_data,dropout_input_keep_prob,dropout_output_keep_prob): # image_rnn_input_data (n_batch_size, n_steps, n_features) # label_rnn_input_data (n_batch_size, n_label_rnn_steps, n_classes) # Convulation NN image_width = image_input_data.get_shape()[1].value image_height = image_input_data.get_shape()[2].value image_input_data_conv = tf.reshape(image_input_data, [-1, image_width, image_height, 1]) n_conv1_patch_size = 7 n_conv1_channels = 32 print("Convolutional layer 1, Patch size:",n_conv1_patch_size,"Channels:",n_conv1_channels) w_conv1 = tf.Variable(tf.random_normal([n_conv1_patch_size, n_conv1_patch_size, 1, n_conv1_channels]),name="w_conv1") b_conv1 = tf.Variable(tf.random_normal([n_conv1_channels]),name="b_conv1") conv1 = tf.tanh(tf.nn.conv2d(image_input_data_conv, w_conv1, strides=[1, 1, 1, 1], padding='SAME') + b_conv1) # n_conv2_patch_size = 5 # n_conv2_channels = 16 # print("Convolutional layer 2, Patch size:", n_conv2_patch_size, "Channels:", n_conv2_channels) # w_conv2 = tf.Variable(tf.random_normal([n_conv2_patch_size, n_conv2_patch_size, n_conv1_channels, n_conv2_channels]),name="w_conv2") # b_conv2 = tf.Variable(tf.random_normal([n_conv2_channels]),name="b_conv2") # # conv2 = tf.tanh(tf.nn.conv2d(conv1, w_conv2, strides=[1, 1, 1, 1], padding='SAME') + b_conv2) # # n_conv3_patch_size = 5 # n_conv3_channels = 16 # print("Convolutional layer 3, Patch size:", n_conv3_patch_size, "Channels:", n_conv3_channels) # w_conv3 = tf.Variable( # tf.random_normal([n_conv3_patch_size, n_conv3_patch_size, n_conv2_channels, n_conv3_channels]), name="w_conv3") # b_conv3 = tf.Variable(tf.random_normal([n_conv3_channels]), name="b_conv3") # # conv3 = tf.tanh(tf.nn.conv2d(conv2, w_conv3, strides=[1, 1, 1, 1], padding='SAME') + b_conv3) image_rnn_inputs = tf.reshape(conv1, [-1, image_width, image_height*n_conv1_channels]) # Define RNN architecture n_image_rnn_cells = 1 n_image_rnn_hidden = 96 # hidden layer num of features print("Image LSTM cells:", n_image_rnn_cells, "Image LSTM hidden units:", n_image_rnn_hidden) n_label_rnn_cells = 1 n_label_rnn_hidden = 96 # hidden layer num of features print("Label LSTM cells:", n_label_rnn_cells, "Label LSTM hidden units:", n_label_rnn_hidden) # Retrieve dimensions from input data image_batch_size = tf.shape(image_rnn_inputs)[0] n_image_rnn_steps = image_rnn_inputs.get_shape()[1].value # Timesteps = image width n_image_features = image_rnn_inputs.get_shape()[2].value label_batch_size = tf.shape(label_rnn_input_data)[0] n_label_rnn_steps = label_rnn_input_data.get_shape()[1].value n_classes = label_rnn_input_data.get_shape()[2].value print(n_image_rnn_steps,n_image_features) print(n_label_rnn_steps,n_classes) # Define RNN weights w_label_hidden = tf.Variable(tf.random_normal([n_classes, n_label_rnn_hidden]),name="w_label_hidden") b_label_hidden = tf.Variable(tf.random_normal([n_label_rnn_hidden]),name="b_label_hidden") w_label_out = tf.Variable(tf.random_normal([n_label_rnn_hidden, n_classes]),name="w_label_out") b_label_out = tf.Variable(tf.random_normal([n_classes]),name="b_label_out") # Image RNN image_lstm_cell = rnn_cell.LSTMCell(n_image_rnn_hidden) image_lstm_cell = rnn_cell.DropoutWrapper(image_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob) if n_image_rnn_cells > 1: image_lstm_cell = rnn_cell.MultiRNNCell([image_lstm_cell] * n_image_rnn_cells) image_rnn_initial_state = image_lstm_cell.zero_state(image_batch_size, tf.float32) image_rnn_outputs, image_rnn_states = rnn.dynamic_rnn(image_lstm_cell, image_rnn_inputs, initial_state=image_rnn_initial_state, sequence_length=image_input_lengths, scope="RNN1") image_rnn_output = last_relevant(image_rnn_outputs,image_input_lengths) # Transform input data for label RNN label_rnn_inputs = tf.transpose(label_rnn_input_data, [1, 0, 2]) # (n_output_steps,n_batch_size,n_classes) label_rnn_inputs = tf.reshape(label_rnn_inputs, [-1, n_classes]) # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each) label_rnn_inputs = tf.matmul(label_rnn_inputs, w_label_hidden) + b_label_hidden # (n_steps*n_batch_size=28*256,n_hidden=128) label_rnn_inputs = tf.split(0, n_label_rnn_steps, label_rnn_inputs) # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)] # Label RNN label_lstm_cell = rnn_cell.LSTMCell(n_label_rnn_hidden, forget_bias=0) label_lstm_cell = rnn_cell.DropoutWrapper(label_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob) if n_label_rnn_cells > 1: label_lstm_cell = rnn_cell.MultiRNNCell([label_lstm_cell] * n_label_rnn_cells) label_rnn_initial_state = image_rnn_output label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32) w_image2label = tf.Variable( tf.random_normal([image_rnn_output.get_shape()[1].value, label_rnn_initial_state.get_shape()[1].value])) b_image2label = tf.Variable(tf.random_normal([label_rnn_initial_state.get_shape()[1].value])) label_rnn_initial_state = tf.tanh(tf.matmul(image_rnn_output, w_image2label) + b_image2label) label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs, initial_state=label_rnn_initial_state, scope="RNN2") label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in label_rnn_outputs] # n_label_rnn_steps * (n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs) # (n_label_rnn_steps,n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels, [1, 0, 2]) # (n_batch_size,n_label_rnn_steps,n_classes) label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels, 2) # (n_batch_size, n_label_rnn_steps) return label_rnn_outputs,label_rnn_predicted_index_labels
def __init__(self, num_labels, num_layers, hidden_size, dropout, batch_size, learning_rate, lr_decay_factor, grad_clip, max_input_seq_length, max_target_seq_length, input_dim, forward_only=False): ''' Acoustic rnn model, using ctc loss with lstm cells Inputs: num_labels - dimension of character input/one hot encoding num_layers - number of lstm layers hidden_size - size of hidden layers dropout - probability of dropping hidden weights batch_size - number of training examples fed at once learning_rate - learning rate parameter fed to optimizer grad_clip - max gradient size (prevent exploding gradients) max_seq_length - maximum length of input vector sequence input_dim - dimension of input vector forward_only - whether to build back prop nodes or not ''' self.dropout = dropout self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * lr_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.dropout_keep_prob_lstm_input = tf.constant(self.dropout) self.dropout_keep_prob_lstm_output = tf.constant(self.dropout) self.max_input_seq_length = max_input_seq_length self.max_target_seq_length = max_target_seq_length #graph inputs self.inputs = tf.placeholder( tf.float32, shape=[self.max_input_seq_length, None, input_dim], name="inputs") self.input_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="input_seq_lengths") self.target_seq_lengths = tf.placeholder(tf.int32, shape=[None], name="target_seq_lengths") #graph sparse tensor inputs self.target_indices = tf.placeholder(tf.int64, shape=[None, 2], name="target_indices") self.target_vals = tf.placeholder(tf.int32, shape=[None], name="target_vals") #define cells of acoustic model cell = rnn_cell.DropoutWrapper( rnn_cell.BasicLSTMCell(hidden_size), input_keep_prob=self.dropout_keep_prob_lstm_input, output_keep_prob=self.dropout_keep_prob_lstm_output) if num_layers > 1: cell = rnn_cell.MultiRNNCell([cell] * num_layers) #build input layer w_i = tf.get_variable("input_w", [input_dim, hidden_size]) b_i = tf.get_variable("input_b", [hidden_size]) #make rnn inputs inputs = [ tf.nn.xw_plus_b(tf.squeeze(i), w_i, b_i) for i in tf.split(0, self.max_input_seq_length, self.inputs) ] #set rnn init state to 0s initial_state = cell.zero_state(self.batch_size, tf.float32) #build rnn rnn_output, self.hidden_state = rnn.dynamic_rnn( cell, tf.pack(inputs), sequence_length=self.input_seq_lengths, initial_state=initial_state, time_major=True, parallel_iterations=100) #build output layer w_o = tf.get_variable("output_w", [hidden_size, num_labels]) b_o = tf.get_variable("output_b", [num_labels]) #compute logits self.logits = [ tf.nn.xw_plus_b(tf.squeeze(i), w_o, b_o) for i in tf.split(0, self.max_input_seq_length, rnn_output) ] #setup sparse tensor for input into ctc loss sparse_labels = tf.SparseTensor( indices=self.target_indices, values=self.target_vals, shape=[self.batch_size, self.max_target_seq_length]) #compute ctc loss self.ctc_loss = ctc.ctc_loss(tf.pack(self.logits), sparse_labels, self.input_seq_lengths) self.mean_loss = tf.reduce_mean(self.ctc_loss) params = tf.trainable_variables() if not forward_only: opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.ctc_loss, params) clipped_gradients, norm = tf.clip_by_global_norm( gradients, grad_clip) self.update = opt.apply_gradients(zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def define_seq2seq_rnn_for_training(image_rnn_input_data,image_rnn_input_lengths,label_rnn_input_data,dropout_input_keep_prob,dropout_output_keep_prob): # image_rnn_input_data (n_batch_size, n_steps, n_features) # label_rnn_input_data (n_batch_size, n_label_rnn_steps, n_classes) # Define RNN architecture n_image_rnn_cells = 1 n_image_rnn_hidden = 96 # hidden layer num of features print("Image LSTM cells:", n_image_rnn_cells, "Image LSTM hidden units:", n_image_rnn_hidden) n_label_rnn_cells = 1 n_label_rnn_hidden = 96 # hidden layer num of features print("Label LSTM cells:", n_label_rnn_cells, "Label LSTM hidden units:", n_label_rnn_hidden) # Retrieve dimensions from input data image_batch_size = tf.shape(image_rnn_input_data)[0] n_image_rnn_steps = image_rnn_input_data.get_shape()[1].value # Timesteps = image width n_image_features = image_rnn_input_data.get_shape()[2].value label_batch_size = tf.shape(label_rnn_input_data)[0] n_label_rnn_steps = label_rnn_input_data.get_shape()[1].value n_classes = label_rnn_input_data.get_shape()[2].value print(n_image_rnn_steps,n_image_features) print(n_label_rnn_steps,n_classes) # Define weights w_image_hidden = tf.Variable(tf.random_normal([n_image_features, n_image_rnn_hidden])) b_image_hidden = tf.Variable(tf.random_normal([n_image_rnn_hidden])) w_label_hidden = tf.Variable(tf.random_normal([n_classes, n_label_rnn_hidden])) b_label_hidden = tf.Variable(tf.random_normal([n_label_rnn_hidden])) w_label_out = tf.Variable(tf.random_normal([n_label_rnn_hidden, n_classes])) b_label_out = tf.Variable(tf.random_normal([n_classes])) # Transform input data for image RNN # image_rnn_inputs = tf.transpose(image_rnn_input_data, [1, 0, 2]) # (n_input_steps,n_batch_size,n_features) # image_rnn_inputs = tf.reshape(image_rnn_inputs, [-1, # n_image_features]) # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each) # image_rnn_inputs = tf.matmul(image_rnn_inputs, # w_image_hidden) + b_image_hidden # (n_steps*n_batch_size=28*256,n_hidden=128) # image_rnn_inputs = tf.split(0, n_image_rnn_steps, # image_rnn_inputs) # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)] image_rnn_inputs = image_rnn_input_data # Transform target data for label RNN # label_rnn_target_outputs = tf.transpose(label_rnn_target_data, [1, 0]) # (n_label_rnn_steps,n_batch_size) # label_rnn_target_outputs = tf.split(0, n_label_rnn_steps, label_rnn_target_outputs) # label_rnn_target_outputs = [tf.squeeze(lrt) for lrt in label_rnn_target_outputs] # Image RNN image_lstm_cell = rnn_cell.LSTMCell(n_image_rnn_hidden) image_lstm_cell = rnn_cell.DropoutWrapper(image_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob) if n_image_rnn_cells > 1: image_lstm_cell = rnn_cell.MultiRNNCell([image_lstm_cell] * n_image_rnn_cells) image_rnn_initial_state = image_lstm_cell.zero_state(image_batch_size, tf.float32) image_rnn_outputs, image_rnn_states = rnn.dynamic_rnn(image_lstm_cell, image_rnn_inputs, initial_state=image_rnn_initial_state, sequence_length=image_rnn_input_lengths, scope="RNN1") # image_lstm_fw_cell = rnn_cell.LSTMCell(n_image_rnn_hidden, forget_bias=0) # image_lstm_fw_cell = rnn_cell.DropoutWrapper(image_lstm_fw_cell, input_keep_prob=dropout_input_keep_prob, # output_keep_prob=dropout_output_keep_prob) # if n_image_rnn_cells > 1: # image_lstm_fw_cell = rnn_cell.MultiRNNCell([image_lstm_fw_cell] * n_image_rnn_cells) # image_rnn_initial_state_fw = image_lstm_fw_cell.zero_state(image_batch_size, tf.float32) # # image_lstm_bw_cell = rnn_cell.LSTMCell(n_image_rnn_hidden, forget_bias=0) # image_lstm_bw_cell = rnn_cell.DropoutWrapper(image_lstm_bw_cell, input_keep_prob=dropout_input_keep_prob, # output_keep_prob=dropout_output_keep_prob) # if n_image_rnn_cells > 1: # image_lstm_bw_cell = rnn_cell.MultiRNNCell([image_lstm_bw_cell] * n_image_rnn_cells) # image_rnn_initial_state_bw = image_lstm_bw_cell.zero_state(image_batch_size, tf.float32) # # image_rnn_outputs, image_rnn_state_fw, image_rnn_state_bw = rnn.bidirectional_rnn(image_lstm_fw_cell, # image_lstm_bw_cell, # image_rnn_inputs, # initial_state_fw=image_rnn_initial_state_fw, # initial_state_bw=image_rnn_initial_state_bw) #image_rnn_output = image_rnn_outputs[-1] image_rnn_output = last_relevant(image_rnn_outputs,image_rnn_input_lengths) # Transform input data for label RNN label_rnn_inputs = tf.transpose(label_rnn_input_data, [1, 0, 2]) # (n_output_steps,n_batch_size,n_classes) label_rnn_inputs = tf.reshape(label_rnn_inputs, [-1, n_classes]) # (n_steps*n_batch_size, n_features) (2D list with 28*256 vectors with 28 features each) label_rnn_inputs = tf.matmul(label_rnn_inputs, w_label_hidden) + b_label_hidden # (n_steps*n_batch_size=28*256,n_hidden=128) label_rnn_inputs = tf.split(0, n_label_rnn_steps, label_rnn_inputs) # [(n_batch_size, n_features),(n_batch_size, n_features),...,(n_batch_size, n_features)] # Label RNN label_lstm_cell = rnn_cell.LSTMCell(n_label_rnn_hidden, forget_bias=0) label_lstm_cell = rnn_cell.DropoutWrapper(label_lstm_cell, input_keep_prob=dropout_input_keep_prob, output_keep_prob=dropout_output_keep_prob) if n_label_rnn_cells > 1: label_lstm_cell = rnn_cell.MultiRNNCell([label_lstm_cell] * n_label_rnn_cells) label_rnn_initial_state = image_rnn_output label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32) w_image2label = tf.Variable( tf.random_normal([image_rnn_output.get_shape()[1].value, label_rnn_initial_state.get_shape()[1].value])) b_image2label = tf.Variable(tf.random_normal([label_rnn_initial_state.get_shape()[1].value])) label_rnn_initial_state = tf.tanh(tf.matmul(image_rnn_output, w_image2label) + b_image2label) label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs, initial_state=label_rnn_initial_state, scope="RNN2") label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in label_rnn_outputs] # n_label_rnn_steps * (n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs) # (n_label_rnn_steps,n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels, [1, 0, 2]) # (n_batch_size,n_label_rnn_steps,n_classes) label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels, 2) # (n_batch_size, n_label_rnn_steps) return label_rnn_outputs,label_rnn_predicted_index_labels
def build_model(self): with tf.variable_scope('RNNTEST'): self.sense = tf.placeholder(tf.int32,[None]) self.arg1 = tf.placeholder(tf.int32,[None,None,4]) self.arg2 = tf.placeholder(tf.int32,[None,None,4]) self.arg1_len = tf.placeholder(tf.int32,[None]) self.arg2_len = tf.placeholder(tf.int32,[None]) self.keep_prob = tf.placeholder(tf.float32) arg1_list = tf.split(2,4,self.arg1) arg2_list = tf.split(2,4,self.arg2) with tf.device('/cpu:0'): NER_W = tf.get_variable('NER_embed',[self.data_loader.NER_vocab_size,self.NER_embed_size]) if self.NER_embed_size>0 else None lemma_W = tf.get_variable('lemma_embed',[self.data_loader.lemma_vocab_size,self.lemma_embed_size]) if self.lemma_embed_size>0 else None if self.use_pre_trained_embedding: word_W = tf.get_variable('word_embed',initializer = tf.convert_to_tensor(self.data_loader.pre_trained_word_embeddings,dtype=tf.float32)) if self.word_embed_size>0 else None else: word_W = tf.get_variable('word_embed',shape = [self.data_loader.word_vocab_size,self.word_embed_size]) if self.word_embed_size>0 else None POS_W = tf.get_variable('POS_embed',[self.data_loader.POS_vocab_size,self.POS_embed_size]) if self.POS_embed_size>0 else None arg1_embed_list = [] arg2_embed_list = [] for idx,W in enumerate([NER_W,lemma_W,word_W,POS_W]): if W is not None: arg1_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg1_list[idx],[2]))) arg2_embed_list.append(tf.nn.embedding_lookup(W,tf.squeeze(arg2_list[idx],[2]))) arg1 = tf.nn.dropout(tf.concat(2,arg1_embed_list),self.keep_prob) arg2 = tf.nn.dropout(tf.concat(2,arg2_embed_list),self.keep_prob) encoder_lstm_unit = rnn_cell.BasicLSTMCell(self.encoder_size) decoder_lstm_unit = rnn_cell.BasicLSTMCell(self.decoder_size) with tf.variable_scope('forward_encoder'): forward_encoder_outputs,forward_encoder_state = rnn.dynamic_rnn(encoder_lstm_unit,arg1,self.arg1_len,dtype=tf.float32) with tf.variable_scope('backward_encoder'): backward_encoder_outputs,backward_encoder_state= rnn.dynamic_rnn(encoder_lstm_unit,tf.reverse_sequence(arg1,tf.cast(self.arg1_len,tf.int64),1),dtype=tf.float32) encoder_outputs = tf.concat(2,[forward_encoder_outputs,tf.reverse_sequence(backward_encoder_outputs,tf.cast(self.arg1_len,tf.int64),1)]) encoder_state = tf.concat(1,[forward_encoder_state,backward_encoder_state]) source = tf.expand_dims(encoder_outputs,2) #batch_size x source_len x 1 x source_depth(2*encoder_size) attention_W = tf.get_variable('attention_W',[1,1,2*self.encoder_size,self.attention_judge_size]) attention_V = tf.get_variable('attention_V',[self.attention_judge_size]) WxH = tf.nn.conv2d(source, attention_W,[1,1,1,1],'SAME') #batch_size x source_len x 1 x attention self.mask = tf.placeholder(tf.float32,[None,None]) def attention(input_t,output_t_minus_1,time): with tf.variable_scope('attention'): VxS = tf.reshape(rnn_cell.linear(output_t_minus_1,self.attention_judge_size,True),[-1,1,1,self.attention_judge_size]) #batch_size x 1 x 1 x attention _exp = tf.exp(tf.reduce_sum( attention_V * tf.tanh(WxH+VxS), [3]))#batch_size x source_len x 1 _exp = _exp*tf.expand_dims(self.mask,-1) attention_weight = _exp/tf.reduce_sum(_exp,[1], keep_dims=True) attention_t = tf.reduce_sum(encoder_outputs*attention_weight,[1]) feed_in_t = tf.tanh(rnn_cell.linear([attention_t,input_t],self.embedding_size,True)) return feed_in_t with tf.variable_scope('decoder'): decoder_outputs,decoder_state = dynamic_rnn_decoder(arg2,decoder_lstm_unit,initial_state=encoder_state,sequence_length=self.arg2_len,loop_function=attention) judge = tf.concat(1,[tf.reduce_sum(decoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg2_len,tf.float32),-1),tf.reduce_sum(encoder_outputs,[1])/tf.expand_dims(tf.cast(self.arg1_len,tf.float32),-1)]) unscaled_log_distribution = rnn_cell.linear(judge,self.data_loader.sense_vocab_size,True) self.output = tf.cast(tf.argmax(unscaled_log_distribution,1),tf.int32) self.accuracy = tf.reduce_mean(tf.cast(tf.equal(self.output,self.sense), tf.float32)) #max-margin method #self._MM = tf.placeholder(tf.int32,[None]) #margin = tf.sub(tf.reduce_max(unscaled_log_distribution,[1]),tf.gather(tf.reshape(unscaled_log_distribution,[-1]),self._MM)) #self.loss = tf.reduce_mean(margin) #maximum likelihood method self.loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(unscaled_log_distribution, self.sense)) self.optimizer = tf.train.AdagradOptimizer(self.lr) self.train_op = self.optimizer.minimize(self.loss)