def pad(text, start_id=None, end_id=None): print('Pad with start_id', start_id, ' end_id', end_id) need_start_mark = start_id is not None need_end_mark = end_id is not None if not need_start_mark and not need_end_mark: return text, melt.length(text) batch_size = tf.shape(text)[0] zero_pad = tf.zeros([batch_size, 1], dtype=text.dtype) sequence_length = melt.length(text) if not need_start_mark: text = tf.concat([text, zero_pad], 1) else: if need_start_mark: start_pad = zero_pad + start_id if need_end_mark: text = tf.concat([start_pad, text, zero_pad], 1) else: text = tf.concat([start_pad, text], 1) sequence_length += 1 if need_end_mark: text = melt.dynamic_append_with_length( text, sequence_length, tf.constant(end_id, dtype=text.dtype)) sequence_length += 1 return text, sequence_length
def pad(text, start_id=None, end_id=None, weights=None, end_weight=1.0): logging.info('Pad with start_id', start_id, ' end_id', end_id) need_start_mark = start_id is not None need_end_mark = end_id is not None if not need_start_mark and not need_end_mark: return text, melt.length(text), weights batch_size = tf.shape(text)[0] zero_pad = tf.zeros([batch_size, 1], dtype=text.dtype) sequence_length = melt.length(text) if not need_start_mark: text = tf.concat([text, zero_pad], 1) if weights is not None: weights = tf.concat([weights, tf.ones_like(zero_pad, dtype=tf.float32) * end_weight], 1) else: if need_start_mark: start_pad = zero_pad + start_id if need_end_mark: text = tf.concat([start_pad, text, zero_pad], 1) if weights is not None: weights = tf.concat([tf.zeros_like(start_pad, dtype=tf.float32), weights, tf.ones_like(zero_pad, dtype=tf.float32) * end_weight], 1) else: text = tf.concat([start_pad, text], 1) if weights is not None: weights = tf.concat([tf.zeros_like(start_pad, dtype=tf.float32), weights], 1) sequence_length += 1 if need_end_mark: text = melt.dynamic_append_with_length( text, sequence_length, tf.constant(end_id, dtype=text.dtype)) if weights is not None: weights = melt.dynamic_append_with_length_float32( weights, sequence_length, tf.constant(end_weight, dtype=weights.dtype)) sequence_length += 1 return text, sequence_length, weights
def gen_text_feature(self, text): is_training = self.is_training batch_size = tf.shape(text)[0] zero_pad = tf.zeros([batch_size, 1], dtype=text.dtype) text = tf.concat(1, [zero_pad, text, zero_pad]) sequence_length = melt.length(text) + 1 text = melt.dynamic_append_with_length( text, sequence_length, tf.constant(self.end_id, dtype=text.dtype)) sequence_length += 1 state = self.cell.zero_state(batch_size, tf.float32) inputs = tf.nn.embedding_lookup(self.emb, text) if is_training and FLAGS.keep_prob < 1: inputs = tf.nn.dropout(inputs, FLAGS.keep_prob) outputs, state = tf.nn.dynamic_rnn(self.cell, inputs, initial_state=state, sequence_length=sequence_length) text_feature = melt.dynamic_last_relevant(outputs, sequence_length) return text_feature
def compute_seq_loss(self, image_emb, text): """ same ass 7 but use dynamic rnn """ #notice here must use tf.shape not text.get_shape()[0], because it is dynamic shape, known at runtime is_training = self.is_training batch_size = tf.shape(text)[0] zero_pad = tf.zeros([batch_size, 1], dtype=text.dtype) #add zero before sentence to avoid always generate A... #add zero after sentence to make sure end mark will not exceed boundary incase your input sentence is long with out 0 padding at last text = tf.concat(1, [zero_pad, text, zero_pad]) #+1 for the first zero sequence_length = melt.length(text) + 1 text = melt.dynamic_append_with_length( text, sequence_length, tf.constant(self.end_id, dtype=text.dtype)) sequence_length += 1 #@TODO different init state as show in ptb_word_lm state = self.cell.zero_state(batch_size, tf.float32) self.initial_state = state #print('melt.last_dimension(text)', melt.last_dimension(text)) #[batch_size, num_steps - 1, emb_dim], remove last col #notice tf 10.0 now do not support text[:,:-1] @TODO may change to that if tf support in future #now the hack is to use last_dimension wich will inside use static shape notice dynamic shape like tf.shape not work! #using last_dimension is static type! Konwn on graph construction not dynamic runtime #inputs = tf.nn.embedding_lookup(self.emb, text[:,:melt.last_dimension(text) - 1]) + self.bemb # TypeError("Using a `tf.Tensor` as a Python `bool` is not allowed. " #inputs = tf.nn.embedding_lookup(self.emb, text[:,:tf.shape(text)[1] - 1]) + self.bemb #can see ipynotebook/dynamic_length.npy #well this work.. #num_steps = tf.shape(text)[1] #inputs = tf.nn.embedding_lookup(self.emb, melt.exclude_last_col(text)) + self.bemb inputs = tf.nn.embedding_lookup( self.emb, melt.dynamic_exclude_last_col(text)) + self.bemb if is_training and FLAGS.keep_prob < 1: inputs = tf.nn.dropout(inputs, FLAGS.keep_prob) #[batch_size, num_steps, emb_dim] image_emp( [batch_size, emb_dim] -> #[batch_size, 1, emb_dim]) before concat inputs = tf.concat(1, [tf.expand_dims(image_emb, 1), inputs]) outputs, state = tf.nn.dynamic_rnn(self.cell, inputs, initial_state=state, sequence_length=sequence_length) self.final_state = state #@TODO now looks like this version is much faster then using like _compute_seq_loss13 #but still there are much un necessary calculations like mat mul for all batch_size * num steps .. #can we speed up by not calc loss for mask[pos] == 0 ? output = tf.reshape(outputs, [-1, self.emb_dim]) with tf.device('/cpu:0'): logits = tf.matmul( output, self.embed_word_W ) + self.embed_word_b if self.softmax_loss_function is None else output targets = text mask = tf.cast(tf.sign(text), dtype=tf.float32) loss = tf.nn.seq2seq.sequence_loss_by_example( [logits], [tf.reshape(targets, [-1])], [tf.reshape(mask, [-1])], softmax_loss_function=self.softmax_loss_function) #--------@TODO seems using below and tf.reduce_mean will generate not as good as above loss and melt.reduce_mean #--if no bug the diff shold be per example(per step) loss and per single step loss if (not is_training) or FLAGS.per_example_loss: loss = melt.reduce_mean_with_mask(tf.reshape( loss, [batch_size, -1]), mask, reduction_indices=1, keep_dims=True) else: #if use this the will be [batch_size * num_steps, 1], so for use negs, could not use dynamic length mode loss = tf.reshape(loss, [-1, 1]) return loss