def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE loss_init = [tf.ones([self.config.batch_size * self.config.num_steps])] reshaped_labels = tf.reshape( self.labels_placeholder, [self.config.batch_size * self.config.num_steps, -1]) cross_entropy = sequence_loss([output], [reshaped_labels], loss_init, len(self.vocab)) #add cross_entropy (loss between pred and labels) tf.add_to_collection("total_loss", cross_entropy) #tf.get_collection(name, scope=None) : Returns a list of values in the collection with the given name loss = tf.add_n(tf.get_collection("total_loss")) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE #""" all_ones_weights = [tf.ones([self.config.batch_size * self.config.num_steps])] # output is logits loss = sequence_loss([output], \ [tf.reshape(self.labels_placeholder, [-1])],\ all_ones_weights) # , len(self.vocab) """ all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) """ ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py Args: output: A tensor of shape (None, self.vocab) (LIBIN : not used) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # output shape : [num_steps * (batch_size, len(self.vocab))] # targets shape : [num_steps * (batch_size, )] # weights shape : [num_steps * (batch_size, )] targets = [ tf.squeeze(ts, [1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder) ] weights = [ tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps) ] loss = sequence_loss(output, targets, weights) ### END YOUR CODE return loss
def build_loss(self, out, out_tensor): """Build a loss function and accuracy for the model.""" print(' Building loss and accuracy') with tf.variable_scope('accuracy'): argmax = tf.to_int32(tf.argmax(out_tensor, 2)) correct = tf.to_float(tf.equal(argmax, self.ts)) * self.t_mask accuracy = tf.reduce_sum(correct) / tf.reduce_sum(self.t_mask) with tf.variable_scope('loss'): with tf.variable_scope('split_t_and_mask'): split_kwargs = { 'split_dim': 1, 'num_split': self.max_t_seq_len } ts = tf.split(value=self.ts, **split_kwargs) t_mask = tf.split(value=self.t_mask, **split_kwargs) t_mask = [tf.squeeze(weight) for weight in t_mask] loss = seq2seq.sequence_loss(out, ts, t_mask, self.max_t_seq_len) with tf.variable_scope('regularization'): regularize = tf.contrib.layers.l2_regularizer(self.reg_scale) params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) reg_term = sum([regularize(param) for param in params]) loss += reg_term return loss, accuracy
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE #logits = output #print(logits) #targets = self.labels_placeholder #print(targets) #weights = tf.ones((self.config.batch_size * self.config.num_steps)) #loss = sequence_loss(logits, tf.reshape(targets, [-1]), weights) all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss
def add_loss_op(self, outputs): all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([outputs],\ [tf.reshape(self.label_placeholder, [-1])],\ all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) return loss
def build_xent_loss(self, scores): batch_size = self.opts.batch_size sequence_length = self.opts.sequence_length vocab_dim = self.dataset.vocab_dim scores = tf.reshape(scores, (batch_size * sequence_length, vocab_dim)) logits = [scores] targets = [tf.reshape(self.xent_targets_placeholder, [-1])] weights = [tf.ones((batch_size * sequence_length,))] loss = sequence_loss(logits, targets, weights) return loss
def add_loss_op(self, output): # (batch_size, num_steps) all_ones = [tf.ones([self.config.num_steps * self.config.batch_size])] # 序列的交叉熵损失,即整个长度为num_step的序列的损失 # sequence_loss各个参数说明请看源码 # sequence_loss返回的是平均对数困惑度(log-perplexity),即平均交叉熵 cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones) # 得到所有的误差.如果要考虑L2正则化,可以考虑将RNN_I和RNN_H以及U加入total_loss tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) return loss
def create_loss(self): start_time = time.time() self.losses = [] logits = self.decoder_states targets = self.tokens[1:] weights = self.tokens_weights[1:] log_perps = seq2seq.sequence_loss(logits, targets, weights, self.vocab_size) self.losses.append(log_perps) print('create_loss graph time %f' % (time.time() - start_time))
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ all_ones_weights = [tf.ones([self.config.batch_size * self.config.num_steps])] # output is logits loss = sequence_loss([output], \ [tf.reshape(self.labels_placeholder, [-1])],\ all_ones_weights) # , len(self.vocab) return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ return sequence_loss([output], [ tf.reshape(self.labels_placeholder, [self.config.batch_size * self.config.num_steps, -1]) ], [tf.constant(1.0)])
def get_loss(self, seq_length): if notself.outputs.has_key(seq_length): self.get_outputs(seq_length) if not self.losses.has_key(seq_length): loss = sequence_loss(logits=self.outputs[seq_length], targets = self.true_outputs[0:seq_length], weights = [1]*seq_length, average_across_timesteps = False, average_across_batch = False, softmax_loss_function = binary_cross_entropy_with_logits) slef.losses[seq_length] = loss return self.losses[seq_length]
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE weights = tf.ones([self.config.batch_size * self.config.num_steps]) loss = sequence_loss([outputs], [tf.reshape(self.labels_placeholder, [-1])], [weights]) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE flattened_labels = tf.reshape(self.labels_placeholder, [-1]) loss = sequence_loss([output], [flattened_labels], [tf.ones_like(flattened_labels, dtype=tf.float32)]) ### END YOUR CODE return loss
def sequence_loss(self, y_pred, y_true): ''' Loss function for the seq2seq RNN. Reshape predicted and true (label) tensors, generate dummy weights, then use seq2seq.sequence_loss to actually compute the loss function. ''' #print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true)) logits = tf.unpack(y_pred, axis=1) # list of [-1, num_decoder_synbols] elements targets = tf.unpack(y_true, axis=1) # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements #print ("my_sequence_loss logits=%s" % (logits,)) #print ("my_sequence_loss targets=%s" % (targets,)) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] #print ("my_sequence_loss weights=%s" % (weights,)) sl = seq2seq.sequence_loss(logits, targets, weights) #print ("my_sequence_loss return = %s" % sl) return sl
def get_loss(self, seq_length): if not self.outputs.has_key(seq_length): self.get_outputs(seq_length) if not self.losses.has_key(seq_length): loss = sequence_loss(logits=self.outputs[seq_length], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=\ binary_cross_entropy_with_logits) self.losses[seq_length] = loss return self.losses[seq_length]
def sequence_loss(self, y_pred, y_true): ''' Loss function for the seq2seq RNN. Reshape predicted and true (label) tensors, generate dummy weights, then use seq2seq.sequence_loss to actually compute the loss function. ''' if self.verbose > 2: print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true)) logits = tf.unstack(y_pred, axis=1) # list of [-1, num_decoder_synbols] elements targets = tf.unstack(y_true, axis=1) # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements if self.verbose > 2: print ("my_sequence_loss logits=%s" % (logits,)) print ("my_sequence_loss targets=%s" % (targets,)) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] if self.verbose > 4: print ("my_sequence_loss weights=%s" % (weights,)) sl = seq2seq.sequence_loss(logits, targets, weights) if self.verbose > 2: print ("my_sequence_loss return = %s" % sl) return sl
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.constant(1.0, shape=[self.config.batch_size * self.config.num_steps])]) tf.add_to_collection("total_loss", cross_entropy) loss = tf.add_n(tf.get_collection("total_loss")) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ all_ones_weights = [ tf.ones([self.config.batch_size * self.config.num_steps]) ] # output is logits loss = sequence_loss([output], \ [tf.reshape(self.labels_placeholder, [-1])],\ all_ones_weights) # , len(self.vocab) return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE flatten_lables = tf.reshape(self.labels_placeholder, [-1]) weights = [tf.ones_like(flatten_lables, dtype=tf.float32)] cross_entropy = sequence_loss([output], [flatten_lables], weights) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ logits = [output] targets = [tf.reshape(self.labels_placeholder, [-1])] weights = [tf.ones(tf.shape(targets[0]))] loss = sequence_loss(logits, targets, weights, len(self.vocab)) return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE labels = tf.reshape(self.labels_placeholder, [-1]) #onehot = tf.one_hot(intlabels, len(self.vocab), 1, 0) onehot = tf.to_int64(labels) weightCount = self.config.batch_size * self.config.num_steps weights = tf.ones([weightCount]) loss = sequence_loss([output], [onehot], [weights]) return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. tensorflow.python.ops.seq2seq.sequence_loss: search for the source code Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] #seq_loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) seq_loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones) tf.add_to_collection("total_loss", seq_loss) loss = tf.add_n(tf.get_collection("total_loss")) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE weights = tf.ones([self.config.batch_size, self.config.num_steps]) output = tf.reshape(output,[self.config,batch_size, self.config.num_steps, -1]) loss = sequence_loss(output, self.labels_placeholder, weights) ## tf.add_to_collection('total_loss', cross_entropy) ## loss = tf.add_n(tf.get_collection('total_loss')) tf.scalar_summary('loss', loss) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. comment: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ##Code Begin all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ##Code Ends return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] #Return the average log-perplexity per symbol (weighted). cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py Args: output: A tensor of shape (None, self.vocab) (LIBIN : not used) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # output shape : [num_steps * (batch_size, len(self.vocab))] # targets shape : [num_steps * (batch_size, )] # weights shape : [num_steps * (batch_size, )] targets = [tf.squeeze(ts,[1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder)] weights = [tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps)] loss = sequence_loss(output, targets, weights) ### END YOUR CODE return loss
def local_model_with_buckets(encoder_inputs, decoder_inputs, targets, weights, buckets, seq2seq_f, softmax_loss_function=None, name=None): if len(encoder_inputs) < buckets[-1][0]: raise ValueError( "Length of encoder_inputs (%d) must be at least that of la" "st bucket (%d)." % (len(encoder_inputs), buckets[-1][0])) if len(targets) < buckets[-1][1]: raise ValueError("Length of targets (%d) must be at least that of last" "bucket (%d)." % (len(targets), buckets[-1][1])) if len(weights) < buckets[-1][1]: raise ValueError("Length of weights (%d) must be at least that of last" "bucket (%d)." % (len(weights), buckets[-1][1])) all_inputs = encoder_inputs + decoder_inputs + targets + weights losses = [] outputs = [] with ops.name_scope(name, "model_with_buckets", all_inputs): embeddings = embedding_utils.load_vocab() for j, bucket in enumerate(buckets): print("Preparing bucket", str(j), "...") with variable_scope.variable_scope( variable_scope.get_variable_scope(), reuse=True if j > 0 else None): bucket_outputs, _ = seq2seq_f(encoder_inputs[:bucket[0]], decoder_inputs[:bucket[1]], embeddings) outputs.append(bucket_outputs) losses.append( seq2seq.sequence_loss( outputs[-1], targets[:bucket[1]], weights[:bucket[1]], softmax_loss_function=softmax_loss_function)) return outputs, losses
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE num_steps = self.config.num_steps batch_size = self.config.batch_size targets = tf.reshape(self.labels_placeholder, [-1]) weights = tf.ones([batch_size * num_steps], dtype=tf.float32) cross_entropy = sequence_loss([output], [targets], [weights], len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss
def model(encoder_inputs, decoder_inputs, targets, weights, encoder_input_length, list_of_mask, encoder_cell, decoder_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, beam_size=1, output_projection=None, softmax_loss_function=None, dtype=None, name=None): all_inputs = encoder_inputs + decoder_inputs + targets + weights with ops.name_scope(name, "seq2seq_model", all_inputs): with variable_scope.variable_scope("model_seq2seq"): outputs, _, beams = embedding_attention_bidirectional_seq2seq( encoder_inputs, decoder_inputs, encoder_input_length, list_of_mask, encoder_cell, decoder_cell, num_encoder_symbols, num_decoder_symbols, embedding_size, beam_size=beam_size, output_projection=output_projection, dtype=dtype) loss = None if beam_size == 1: loss = seq2seq.sequence_loss( outputs, targets, weights, softmax_loss_function=softmax_loss_function) return outputs, loss, beams
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # https://github.com/tensorflow/tensorflow/blob/13ea3ca91ba5aecab6f21acc14b9cb6a9afa8630/tensorflow/python/ops/seq2seq.py#L814 all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) #loss = sequence_loss(output, self.labels_placeholder, tf.ones(self.labels_placeholder.get_shape(), dtype=tf.float32)) #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, self.labels_placeholder)) # raise NotImplementedError ### END YOUR CODE return loss
def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE weights = tf.ones(self.config.batch_size * self.config.num_steps) # might have to reshape outputs according to this op's prototype # loss = tf.contrib.seq2seq.sequence_loss(output, self.labels_placeholder, weights) print "In add_loss_op:" print output.get_shape() print self.labels_placeholder.get_shape() print weights.get_shape() print "---------------" lbls = tf.reshape(self.labels_placeholder, [-1]) loss = sequence_loss([output], [lbls], [weights]) ### END YOUR CODE return loss
def loss(self, predictions, rnn_outputs, labels): """Calculates the loss from the predictions (logits?) and the labels. """ all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([predictions], [tf.reshape(labels, [-1])], all_ones, self.config.data_sets._len_vocab) tf.add_to_collection('total_loss', cross_entropy) with tf.variable_scope('Embedding_similarity'): num_steps = 7 if self.config.num_steps != 1: h1 = [o1*o2 for o1,o2 in zip(rnn_outputs[:-2],rnn_outputs[1:-1])] h2 = [o1*o2 for o1,o2 in zip(rnn_outputs[1:-1],rnn_outputs[2:])] emb_sim = tf.reduce_mean(tf.exp(tf.square(tf.sub(h1,h2)))) else: h1 = [rnn_outputs[0]*rnn_outputs[0] for i in range(num_steps-2)] h2 = [rnn_outputs[0]*rnn_outputs[0] for i in range(num_steps-2)] emb_sim = tf.reduce_mean(tf.exp(tf.square(tf.sub(h1,h2)))) tf.add_to_collection('total_loss', emb_sim) loss = tf.add_n(tf.get_collection('total_loss')) return loss, cross_entropy, emb_sim
def add_loss_op(self, output): logits = [output] targets = [tf.reshape(self.labels_placeholder, [-1])] weights = [tf.ones((self.config.batch_size * self.config.num_steps, ))] loss = sequence_loss(logits, targets, weights) return loss
def build_model(self, forward_only, is_copy=True): print(" [*] Building a NTM model") with tf.variable_scope(self.scope): # present start symbol if is_copy: _, prev_state = self.cell(self.start_symbol, state=None) self.save_state(prev_state, 0, self.max_length) zeros = np.zeros(self.cell.input_dim, dtype=np.float32) tf.get_variable_scope().reuse_variables() for seq_length in xrange(1, self.max_length + 1): progress(seq_length / float(self.max_length)) input_ = tf.placeholder(tf.float32, [self.cell.input_dim], name='input_%s' % seq_length) true_output = tf.placeholder( tf.float32, [self.cell.output_dim], name='true_output_%s' % seq_length) self.inputs.append(input_) self.true_outputs.append(true_output) # present inputs _, prev_state = self.cell(input_, prev_state) self.save_state(prev_state, seq_length, self.max_length) # present end symbol if is_copy: _, state = self.cell(self.end_symbol, prev_state) self.save_state(state, seq_length) self.prev_states[seq_length] = state if not forward_only: # present targets outputs = [] for _ in xrange(seq_length): output, state = self.cell(zeros, state) self.save_state(state, seq_length, is_output=True) outputs.append(output) self.outputs[seq_length] = outputs if not forward_only: for seq_length in xrange(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) loss = sequence_loss(logits=self.outputs[seq_length], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, num_decoder_symbols=-1, # trash average_across_timesteps=False, average_across_batch=False, softmax_loss_function=\ binary_cross_entropy_with_logits) self.losses[seq_length] = loss if not self.params: self.params = tf.trainable_variables() #grads, norm = tf.clip_by_global_norm( # tf.gradients(loss, self.params), 5) grads = [] for grad in tf.gradients(loss, self.params): if grad: grads.append( tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads self.optims[seq_length] = self.opt.apply_gradients( zip(grads, self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a NTM model finished")
def __init__(self, vocab_size, batch_size, topology, cell_sizes, learning_rate, lr_decay_rate, max_gradient_norm, cell_type=BasicLSTMCell, embed=False, forward_only=False): self.emb_size = vocab_size self.batch_size = batch_size self.seq_sizes = topology self.n_layers = len(topology) self.cell_sizes = cell_sizes self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * lr_decay_rate) self.global_step = tf.Variable(0, trainable=False) self.seq_len = 1 for seq_size in self.seq_sizes: self.seq_len *= seq_size self.enc_inputs = [tf.placeholder(tf.float32, [batch_size, self.emb_size], name='Encoder_Input_{}'.format(q)) for q in range(self.seq_len)] self.dec_inputs = [] self.enc_cells = [] self.dec_cells = [] self.enc_scopes = [] self.dec_scopes = [] self.dec_data = [] self.cell_type = cell_type # topology = [..., (layer_size, state_dim), ...] def build_layer(layer_size, input_size): enc_cell = self.cell_type(input_size) if layer_size > 1: enc_cell = [enc_cell] for _ in range(1, layer_size): enc_cell.append(self.cell_type(input_size, enc_cell[-1].output_size)) enc_cell = MultiRNNCell(enc_cell) return enc_cell def build_inputs(seq_len, input_size): return [tf.placeholder(tf.float32, [self.batch_size, input_size]) for _ in range(seq_len)] for i in range(0, self.n_layers): size = self.enc_cells[i - 1].state_size if i > 0 else self.emb_size cell = build_layer(self.cell_sizes[i], size) self.enc_cells.append(cell) self.enc_scopes.append('encoder_{}'.format(i)) dec_input = build_inputs(self.seq_sizes[i], size) self.dec_cells.append(cell) self.dec_inputs.append(dec_input) self.dec_data.append([np.zeros((batch_size, self.dec_cells[i].input_size)) for _ in range(self.seq_sizes[i])]) self.dec_scopes.append('decoder_{}'.format(i)) self.dec_inputs = self.dec_inputs[::-1] self.dec_data = self.dec_data[::-1] self.dec_cells = self.dec_cells[::-1] if embed: self.enc_cells[0] = EmbeddingWrapper(self.enc_cells[0], self.emb_size, self.emb_size) self.enc_inputs = [tf.placeholder(tf.int32, [None], name='Encoder_Input_{}'.format(q)) for q in range(self.seq_len)] self.targets = [tf.placeholder(tf.int32, [None], name='Target_{}'.format(q)) for q in range(self.seq_len)] self.weights = [tf.placeholder(tf.float32, [None], name='Weights_{}'.format(q)) for q in range(self.seq_len)] self.encoder = self.hierarchical_encoder() self.logits = self.hierarchical_decoder(self.encoder) self.seq2seq = [tf.arg_max(x, 1) for x in self.logits] self.losses = seq2seq.sequence_loss(self.logits, self.targets, self.weights) params = tf.trainable_variables() if not forward_only: opt = tf.train.AdadeltaOptimizer(self.learning_rate) gradients = tf.gradients(self.losses, params) clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.gradient_norm = norm self.updates = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
def build_model(self, forward_only, is_copy=True): print(" [*] Building a NTM model") with tf.variable_scope(self.scope): # present start symbol if is_copy: _, prev_state = self.cell(self.start_symbol, state=None) self.save_state(prev_state, 0, self.max_length) zeros = np.zeros(self.cell.input_dim, dtype=np.float32) tf.get_variable_scope().reuse_variables() for seq_length in xrange(1, self.max_length + 1): progress(seq_length/float(self.max_length)) input_ = tf.placeholder(tf.float32, [self.cell.input_dim], name='input_%s' % seq_length) true_output = tf.placeholder(tf.float32, [self.cell.output_dim], name='true_output_%s' % seq_length) self.inputs.append(input_) self.true_outputs.append(true_output) # present inputs _, prev_state = self.cell(input_, prev_state) self.save_state(prev_state, seq_length, self.max_length) # present end symbol if is_copy: _, state = self.cell(self.end_symbol, prev_state) self.save_state(state, seq_length) self.prev_states[seq_length] = state if not forward_only: # present targets outputs = [] for _ in xrange(seq_length): output, state = self.cell(zeros, state) self.save_state(state, seq_length, is_output=True) outputs.append(output) self.outputs[seq_length] = outputs if not forward_only: for seq_length in xrange(self.min_length, self.max_length + 1): print(" [*] Building a loss model for seq_length %s" % seq_length) loss = sequence_loss(logits=self.outputs[seq_length], targets=self.true_outputs[0:seq_length], weights=[1] * seq_length, average_across_timesteps=False, average_across_batch=False, softmax_loss_function=\ binary_cross_entropy_with_logits) self.losses[seq_length] = loss if not self.params: self.params = tf.trainable_variables() #grads, norm = tf.clip_by_global_norm( # tf.gradients(loss, self.params), 5) grads = [] for grad in tf.gradients(loss, self.params): if grad is not None: grads.append(tf.clip_by_value(grad, self.min_grad, self.max_grad)) else: grads.append(grad) self.grads[seq_length] = grads self.optims[seq_length] = self.opt.apply_gradients( zip(grads, self.params), global_step=self.global_step) self.saver = tf.train.Saver() print(" [*] Build a NTM model finished")
def __init__(self, vocab_size, size, num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor, num_samples=512, forward_only=False, max_dialog_length = 10, max_answer_length = 20): self.vocab_size = vocab_size self.batch_size = batch_size self.learning_rate = tf.Variable(float(learning_rate), trainable=False) self.learning_rate_decay_op = self.learning_rate.assign( self.learning_rate * learning_rate_decay_factor) self.global_step = tf.Variable(0, trainable=False) self.max_dialog_length = max_dialog_length self.max_answer_length = max_answer_length # If we use sampled softmax, we need an output projection. output_projection = None softmax_loss_function = None # Sampled softmax only makes sense if we sample less than vocabulary size. if num_samples > 0 and num_samples < self.vocab_size: with tf.device("/cpu:0"): w = tf.get_variable("proj_w", [size, self.vocab_size]) w_t = tf.transpose(w) b = tf.get_variable("proj_b", [self.vocab_size]) output_projection = (w, b) def sampled_loss(inputs, labels): with tf.device("/cpu:0"): labels = tf.reshape(labels, [-1, 1]) return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples, self.vocab_size) softmax_loss_function = sampled_loss # Create the internal multi-layer cell for our RNN. single_cell = tf.nn.rnn_cell.BasicLSTMCell(size) cell = single_cell if num_layers > 1: cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers) # The seq2seq function: we use embedding for the input and attention. def seq2seq_f(encoder_inputs, decoder_inputs, do_decode): return dialog_attention_seq2seq( encoder_inputs, decoder_inputs, cell, vocab_size, output_projection=output_projection, feed_previous=do_decode) # Feeds for inputs. self.encoder_inputs = [] self.decoder_inputs = [] self.target_weights = [] for i in range(0, max_dialog_length): one_turn_encoder_inputs = [] one_turn_decoder_inputs = [] one_turn_target_weights = [] for j in range(0, max_answer_length): one_turn_encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}_{1}".format(i, j))) for j in range(0, max_answer_length + 1): one_turn_decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}_{1}".format(i, j))) one_turn_target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}_{1}".format(i, j))) self.encoder_inputs.append(list(one_turn_encoder_inputs)) self.decoder_inputs.append(list(one_turn_decoder_inputs)) self.target_weights.append(list(one_turn_target_weights)) # Our targets are decoder inputs shifted by one. targets = [] for i in range(0, max_dialog_length): targets.append([self.decoder_inputs[i][j + 1] for j in xrange(len(self.decoder_inputs[i]) - 1)]) # Training outputs and losses. if forward_only: self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, True) self.loss = 0 for i in range(0, max_dialog_length): self.loss += sequence_loss(self.outputs[i][:-1], targets[i], self.target_weights[i][:-1], softmax_loss_function=softmax_loss_function) # If we use output projection, we need to project outputs for decoding. if output_projection is not None: self.outputs = tf.matmul(self.outputs, output_projection[0]) + output_projection[1] else: self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, False) self.loss = 0 for i in range(0, max_dialog_length): self.loss += sequence_loss(self.outputs[i][:-1], targets[i], self.target_weights[i][:-1], softmax_loss_function=softmax_loss_function) # Gradients and SGD update operation for training the model. params = tf.trainable_variables() if not forward_only: opt = tf.train.GradientDescentOptimizer(self.learning_rate) gradients = tf.gradients(self.loss, params) clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients, max_gradient_norm) self.update = opt.apply_gradients( zip(clipped_gradients, params), global_step=self.global_step) self.saver = tf.train.Saver(tf.all_variables())
scope.reuse_variables() decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq( encode_input, decode_input, stacked_lstm, vocab_size, vocab_size, num_hidden, feed_previous=True) # In[6]: with tf.name_scope('loss'): loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in labels] loss = seq2seq.sequence_loss(decode_outputs, labels, loss_weights, vocab_size) tf.scalar_summary('loss', loss) # In[7]: optimizer = tf.train.AdamOptimizer(learning_rate) train = optimizer.minimize(loss) # In[8]: init = tf.initialize_all_variables() saver = tf.train.Saver() sess = tf.InteractiveSession() merged = tf.merge_all_summaries()
def sequence_loss(y_pred, y_true): logits = tf.unpack(y_pred, axis=1) targets = tf.unpack(y_true, axis=1) weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets] return seq2seq.sequence_loss(logits, targets, weights)
label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32) label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs, initial_state=label_rnn_initial_state, scope="RNN2") label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in label_rnn_outputs] # n_label_rnn_steps * (n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs) # (n_label_rnn_steps,n_batch_size,n_classes) label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels,[1,0,2]) # (n_batch_size,n_label_rnn_steps,n_classes) #label_rnn_predicted_index_labels = tf.concat(0,label_rnn_outputs) # (n_label_rnn_steps*n_batch_size,n_classes) # label_rnn_predicted_index_labels = tf.reshape(label_rnn_predicted_index_labels,[-1,n_label_rnn_steps,n_classes]) # (n_batch_size,n_label_rnn_steps,n_classes) label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels,2) # (n_batch_size, n_label_rnn_steps) # Optimization #cost = tf.nn.sparse_softmax_cross_entropy_with_logits(label_rnn_predicted_data,label_rnn_target_data) sequence_loss_weights = [tf.ones(tf.shape(label_rnn_target_outputs[0]))]*n_label_rnn_steps cost = sequence_loss(label_rnn_outputs,label_rnn_target_outputs,sequence_loss_weights) # cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_pred, y)) # Softmax loss optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer # # correct_pred = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1)) # accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # Initializing the variables init = tf.initialize_all_variables() # EXECUTION # Launch the graph with tf.Session() as sess: sess.run(init)
def add_loss_op(self, output): logits = [output] targets = [tf.reshape(self.labels_placeholder, [-1])] weights = [tf.ones((self.config.batch_size * self.config.num_steps,))] loss = sequence_loss(logits, targets, weights) return loss
def __init__(self, args, infer=False): self.args = args if infer: self.batch_size = 1 self.seq_length = 1 else: self.batch_size = args.batch_size self.seq_length = args.seq_length if args.model == 'rnn': cell_fn = rnn_cell.BasicRNNCell elif args.model == 'gru': cell_fn = rnn_cell.GRUCell elif args.model == 'lstm': cell_fn = rnn_cell.BasicLSTMCell elif args.model == 'dropgru' or args.model == 'droprnn': pass else: raise Exception("model type not supported: {}".format(args.model)) if args.model.startswith('drop'): cells = [] dt1 = DropoutBasicRNNCell dt2 = DropoutGRUCell if args.model != 'dropgru': print("additional layers will be basic RNN") dt2 = DropoutBasicRNNCell for ii in range(args.num_layers): if False and args.learn_input_embedding: # context-dependent embedding learned as a small RNN before the large GRUs args.learn_input_embedding = False if ii == 0: nc = dt1(args.vocab_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=0.0) elif ii == 1: nc = dt2(args.rnn_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=args.dropout) else: nc = dt2(args.rnn_size, input_size=args.rnn_size, probofdrop_st=args.dropout, probofdrop_in=args.dropout) else: # embedding is fixed, context-independent; like word vectors firstdroprate = 0.0 if args.learn_input_embedding: firstdroprate = args.dropout if ii == 0: nc = dt2(args.rnn_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=firstdroprate) else: nc = dt2(args.rnn_size, input_size=args.rnn_size, probofdrop_st=args.dropout, probofdrop_in=args.dropout) cells.append(nc) self.cell = rnn_cell.MultiRNNCell(cells) self.cellusesdropout = True else: print("building basic non-dropout model") c1 = cell_fn(args.rnn_size) self.cell = rnn_cell.MultiRNNCell([c1] * args.num_layers) self.cellusesdropout = False self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="x_input_data") self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="y_targets") self.initial_state = self.cell.zero_state(self.batch_size, tf.float32) if args.learn_input_embedding: self.embedding = tf.get_variable("embedding", [args.vocab_size, args.vocab_size]) else: self.embedding = tf.placeholder(tf.float32, [args.vocab_size, args.vocab_size], name="embedding") if self.cellusesdropout: self._dropMaskOutput = tf.placeholder(dtype=tf.float32, shape=[self.batch_size*self.seq_length, args.rnn_size], name="dropout_output_mask") self._latest_mask_output = None with tf.variable_scope('rnnlm'): softmax_w = tf.get_variable("top_softmax_w", [args.rnn_size, args.vocab_size]) softmax_b = tf.get_variable("top_softmax_b", [args.vocab_size]) inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data)) inputs = [tf.squeeze(input_, [1]) for input_ in inputs] def loop(prev, _): if self.cellusesdropout: assert(prev.get_shape() == self._dropMaskOutput.get_shape()) prev = tf.matmul(tf.mul(prev, self._dropMaskOutput), softmax_w) + softmax_b else: prev = tf.matmul(prev, softmax_w) + softmax_b prev_symbol = tf.stop_gradient(tf.argmax(prev, 1)) return tf.nn.embedding_lookup(self.embedding, prev_symbol) self.temperature = tf.placeholder(tf.float32, 1, name="temperature") # if loop_function is not None, it is used to generate the next input # otherwise, if it is None, the next input will be from the "inputs" sequence outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm') output = tf.reshape(tf.concat(1, outputs), [self.batch_size*self.seq_length, args.rnn_size]) if self.cellusesdropout: assert(output.get_shape() == self._dropMaskOutput.get_shape()) self.logits = tf.matmul(tf.mul(output, self._dropMaskOutput), softmax_w) + softmax_b else: self.logits = tf.matmul(output, softmax_w) + softmax_b self.probs = tf.nn.softmax(self.logits) self.probswithtemp = tf.nn.softmax(self.logits / self.temperature) # 1.44... term converts cost from units of "nats" to units of "bits" self.cost = seq2seq.sequence_loss([self.logits], [tf.reshape(self.targets, [-1])], [tf.ones([self.batch_size * self.seq_length])]) * 1.44269504088896340736 self.pred_entropy = tf.reduce_sum(tf.mul(self.probs, tf.log(self.probs + 1e-12)), 1) * (-1.44269504088896340736) self.final_state = last_state self.lr = tf.Variable(0.0, trainable=False, name="learningrate") tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), args.grad_clip) optimizer = tf.train.AdamOptimizer(self.lr) zipgradvars = zip(grads, tvars) self.train_op = optimizer.apply_gradients(zipgradvars) # for tensorboard tb_cost = tf.scalar_summary('cost_train', self.cost) tb_predent = tf.scalar_summary('prediction_entropy_train', tf.reduce_mean(self.pred_entropy)) mergethese = [tb_cost, tb_predent] for grad,var in zipgradvars: mergethese.append(tf.histogram_summary(var.name+'_value', var)) mergethese.append(tf.histogram_summary(var.name+'_grad', grad)) self.tbsummary = tf.merge_summary(mergethese)
def add_loss_op(self, output):#计算损失函数 loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])]) return loss