def test3(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "hate", "cat", "fur"]) tree = tr.Tree("(0 (0 I) (1 (1 hate) (1 (1 cat) (0 (0 cat) (0 fur)))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, -1.0, 0.5, -0.6, 0.2, -0.8], [-0.3, 1.2, 2.0, 0.4, -0.4, 0.2], [-0.8, 0.9, 1.1, 1.0, -2.0, 0.1]], dtype=np.float32), b=np.matrix([[-0.4], [0.3], [0.4]], dtype=np.float32), Ws=np.matrix([[0.2, 0.1, -0.5], [1.2, -0.9, 0.3]], dtype=np.float32), bs=np.matrix([[-0.4], [0.5]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4], [0.04, -0.08, 1.25], [0.2, 0.7, 0.6], [0.2, 0.7, 0.6]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
def test1(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "love", "apple", "juice"]) tree = tr.Tree( "(1 (0 I) (1 (1 love) (1 (1 love) (0 (0 apple) (0 juice)))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, 2.0, 0.0, -0.4, 0.2, -0.8], [-0.5, 1.0, 2.0, 0.0, -0.4, 0.2], [-0.6, 0.9, 1.1, 1.0, -2.0, 0.0]], dtype=np.float32), b=np.matrix([[-0.4], [0.5], [0.2]], dtype=np.float32), Ws=np.matrix([[0.0, 0.1, -0.2], [1.4, -0.7, 0.1]], dtype=np.float32), bs=np.matrix([[-0.1], [0.4]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.1, 0.1, 0.2], [0.04, -0.9, 1.2], [0.2, 0.5, 0.6], [0.2, 0.5, 0.6]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
def test2(): hiddenDim = 3 classes = 2 vocab = Vocab() vocab.construct(["i", "am", "a", "an", "student"]) tree = tr.Tree("(0 (0 I) (1 (1 am) (0 (0 a) (0 student))))") rnn = RecursiveNeuralNet(hiddenDim, classes, vocab) rnn.initialize_matrices(W=np.matrix( [[1.0, -1.0, 0.9, -0.6, 0.2, 0.0], [-0.3, 1.2, 0.0, 0.4, -0.4, 0.0], [-0.8, 0.1, 1.1, 0.0, -2.0, 0.0]], dtype=np.float32), b=np.matrix([[-0.4], [0.0], [0.0]], dtype=np.float32), Ws=np.matrix([[0.0, 0.0, -0.5], [1.0, -0.9, 0.0]], dtype=np.float32), bs=np.matrix([[-0.4], [0.0]], dtype=np.float32), L=np.matrix([[0.4, -0.3, -0.1], [0.6, -0.3, 0.4], [0.04, -0.08, 1.25], [0.2, 0.5, 0.6], [0.1, 0.7, 0.6], [0.1, 0.3, 0.0]], dtype=np.float32)) result = rnn.forward_prop(tree.root) rnn.backward_prop(tree.root) return {'dW': rnn.dW, 'db': rnn.db, 'dWs': rnn.dWs, 'dbs': rnn.dbs}
class WhoseLineModel(object): def __init__(self, config): self.config = config self.load_data(debug=False) self.add_common_model_vars() def load_data(self, debug=False): self.wordvecs = gensim.models.Word2Vec.load_word2vec_format( self.config.wordvecpath, binary=False) self.vocab = Vocab() self.vocab.construct(self.wordvecs.index2word) self.embedding_matrix = np.vstack([ self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab)) ]) # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ self.embedding_matrix[0, :] = np.mean(self.embedding_matrix, axis=0) chapter_split = load_chapter_split(self.config.datasplitpath) self.speakers = Speakers() for line in open(self.config.datapath): ch, speaker, line = line.split("\t") if chapter_split[ch] == 0: self.speakers.add_speaker(speaker) self.speakers.prune(self.config.speaker_count - 1) # -1 for OTHER self.train_data = [] self.dev_data = [] self.test_data = [] oldch = None for ln in open(self.config.datapath): ch, speaker, line = ln.split("\t") encoded_line = (np.array( [self.vocab.encode(word) for word in line.split()], dtype=np.int32), self.speakers.encode(speaker)) if chapter_split[ch] == 0: dataset = self.train_data elif chapter_split[ch] == 1: dataset = self.dev_data else: dataset = self.test_data if self.config.batch_size == "chapter": if ch == oldch: dataset[-1].append(encoded_line) else: dataset.append([encoded_line]) else: dataset.append(encoded_line) oldch = ch def add_common_model_vars(self): with tf.variable_scope("word_vectors"): self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
class WhoseLineModel(object): def __init__(self, config): self.config = config self.load_data(debug=False) self.add_common_model_vars() def load_data(self, debug=False): self.wordvecs = gensim.models.Word2Vec.load_word2vec_format(self.config.wordvecpath, binary=False) self.vocab = Vocab() self.vocab.construct(self.wordvecs.index2word) self.embedding_matrix = np.vstack([self.wordvecs[self.vocab.index_to_word[i]] for i in range(len(self.vocab))]) # next line is "unk" surgery cf. https://groups.google.com/forum/#!searchin/globalvectors/unknown/globalvectors/9w8ZADXJclA/X6f0FgxUnMgJ self.embedding_matrix[0,:] = np.mean(self.embedding_matrix, axis=0) chapter_split = load_chapter_split(self.config.datasplitpath) self.speakers = Speakers() for line in open(self.config.datapath): ch, speaker, line = line.split("\t") if chapter_split[ch] == 0: self.speakers.add_speaker(speaker) self.speakers.prune(self.config.speaker_count-1) # -1 for OTHER self.train_data = [] self.dev_data = [] self.test_data = [] oldch = None for ln in open(self.config.datapath): ch, speaker, line = ln.split("\t") encoded_line = (np.array([self.vocab.encode(word) for word in line.split()], dtype=np.int32), self.speakers.encode(speaker)) if chapter_split[ch] == 0: dataset = self.train_data elif chapter_split[ch] == 1: dataset = self.dev_data else: dataset = self.test_data if self.config.batch_size == "chapter": if ch == oldch: dataset[-1].append(encoded_line) else: dataset.append([encoded_line]) else: dataset.append(encoded_line) oldch = ch def add_common_model_vars(self): with tf.variable_scope("word_vectors"): self.tf_embedding_matrix = tf.constant(self.embedding_matrix, name="embedding")
def load_data(debug=False): """Loads starter word-vectors and train/dev/test data.""" vocab = Vocab() vocab.construct(get_ptb_dataset('train')) encoded_train = np.array( [vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) encoded_valid = np.array( [vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) encoded_test = np.array( [vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 encoded_train = encoded_train[:num_debug] encoded_valid = encoded_valid[:num_debug] encoded_test = encoded_test[:num_debug] return encoded_train, encoded_valid, encoded_test, vocab
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder(tf.float32, shape=[None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.int64, name='Dropout') ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE embeddings = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup(embeddings, self.input_placeholder) inputs = [tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs)] ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.name_scope('Projection Layer'): U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)]) b2 = tf.get_variable('b2', len(self.vocab)) outputs = [tf.nn.softmax(tf.matmul(o,U)+b2) for o in rnn_outputs] ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder,[-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE with tf.variable_scope('InputDropout'): inputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in inputs] with tf.variable_scope('RNN') as scope: self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() H = tf.get_variable('H', [self.config.hidden_size, self.config.hidden_size]) I = tf.get_variable('I', [self.config.embed_size, self.config.hidden_size]) b1 = tf.get_variable('b1', [self.config.hidden_size]) state = tf.nn.sigmoid(tf.matmul(state, H) + tf.matmul(current_input, I) + b1) rnn_outputs.append(state) self.final_state = rnn_outputs[-1] with tf.variable_scope('RNNDropout'): rnn_outputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1.0 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
import sys import time import numpy as np from copy import deepcopy from utils import calculate_perplexity, get_ptb_dataset, Vocab from utils import ptb_iterator, sample from model import LanguageModel import tensorflow as tf from tensorflow.contrib.seq2seq import sequence_loss debug = True """Loads starter word-vectors and train/dev/test data.""" vocab = Vocab() vocab.construct(get_ptb_dataset('train')) encoded_train = np.array( [vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) encoded_valid = np.array( [vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) encoded_test = np.array( [vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 encoded_train = encoded_train[:num_debug] encoded_valid = encoded_valid[:num_debug] encoded_test = encoded_test[:num_debug] print '****** LOADED DATA' '''**********************************************************************************************************''' # Hyper Parameters
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps)) self.labels_placeholder = tf.placeholder( tf.int32, (None, self.config.num_steps )) # Please note that float32 is mentioned but I used int32 self.dropout_placeholder = tf.placeholder(tf.float32, ()) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE np.random.seed(8) embeddings = tf.Variable( tf.random_uniform((len(self.vocab), self.config.embed_size), -1.0, 1.0, seed=8)) embed = tf.nn.embedding_lookup(embeddings, self.input_placeholder) # print '***** EEEEE:',embed inputs = [ tf.squeeze(i) for i in tf.split(embed, self.config.num_steps, 1) ] # print '***** INPUTS:',inputs ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE outputs = [] np.random.seed(8) init = tf.random_normal_initializer(seed=8) with tf.variable_scope('RNN-LM') as scope: scope.reuse_variables() U = tf.get_variable('U', (self.config.hidden_size, len(self.vocab)), tf.float32, init) b_2 = tf.get_variable('bias-2', (1, len(self.vocab)), tf.float32, init) for t in xrange(self.config.num_steps): self.current_state = rnn_outputs[t] # out = tf.nn.softmax(tf.matmul(self.current_state,U)+b_2) out = tf.matmul(self.current_state, U) + b_2 outputs.append(out) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # loss = sequence_loss( ) #print output output = tf.reshape(output, shape=(self.config.batch_size, self.config.num_steps, len(self.vocab))) output = tf.convert_to_tensor(output) seq_weight = tf.ones((self.config.num_steps, self.config.batch_size)) loss = sequence_loss(output, self.labels_placeholder, seq_weight) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE # train_op = tf.train.GradientDescentOptimizer(self.config.lr).minimize(loss) # with tf.variable_scope(tf.get_variable_scope()) as vscope:# Extra added by me on the startar code # tf.get_variable_scope().reuse_variables()# Extra added by me to the starter code # with tf.variable_scope('train',reuse = False) as scope: # train_op = tf.train.AdadeltaOptimizer(self.config.lr).minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=True) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide self.output = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) # self.calculate_loss = self.add_loss_op(output) # self.train_step = self.add_training_op(self.calculate_loss) def setter(self, loss, train_op): self.train_step = train_op self.calculate_loss = loss def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE rnn_outputs = [] np.random.seed(8) init = tf.random_normal_initializer(seed=8) with tf.variable_scope('RNN-LM') as scope: self.initial_state = tf.zeros(shape=(self.config.batch_size, self.config.hidden_size)) H = tf.get_variable( 'H', (self.config.hidden_size, self.config.hidden_size), tf.float32, init) I = tf.get_variable( 'I', (self.config.embed_size, self.config.hidden_size), tf.float32, init) b_1 = tf.get_variable('bias-1', (1, self.config.hidden_size), tf.float32, init) U = tf.get_variable('U', (self.config.hidden_size, len(self.vocab)), tf.float32, init) b_2 = tf.get_variable('bias-2', (1, len(self.vocab)), tf.float32, init) with tf.variable_scope('RNN-LM', reuse=True) as scope: # scope.reuse_variables() self.current_state = self.initial_state H = tf.get_variable( 'H', (self.config.hidden_size, self.config.hidden_size), tf.float32, init) I = tf.get_variable( 'I', (self.config.embed_size, self.config.hidden_size), tf.float32, init) b_1 = tf.get_variable('bias-1', (1, self.config.hidden_size), tf.float32, init) U = tf.get_variable('U', (self.config.hidden_size, len(self.vocab)), tf.float32, init) b_2 = tf.get_variable('bias-2', (1, len(self.vocab)), tf.float32, init) # print '***** inputs[0]',inputs[0] for t in xrange(self.config.num_steps): self.current_state = tf.sigmoid( tf.matmul(self.current_state, H) + tf.matmul(inputs[t], I) + b_1) rnn_outputs.append(self.current_state) self.final_state = rnn_outputs[-1] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE embed_size = self.config.embed_size #epsilon = 0.4 #initializer = tf.random_uniform_initializer(-epsilon, epsilon) initializer = None embedding = tf.get_variable('embedding', [len(self.vocab), self.config.embed_size], initializer=initializer) W1 = tf.get_variable("W1", [2 * embed_size, embed_size], initializer=initializer) b1 = tf.get_variable("b1", [1, embed_size], initializer=initializer) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", [embed_size, self.config.label_size], initializer=initializer) bs = tf.get_variable("bs", [1, self.config.label_size], initializer=initializer) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") ### END YOUR CODE node_tensors = OrderedDict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE curr_node_tensor = tf.gather(embedding, [self.vocab.encode(node.word)]) ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) ### YOUR CODE HERE node_input = tf.concat(1, [node_tensors[node.left], node_tensors[node.right]]) curr_node_tensor = tf.matmul(node_input, W1) + b1 curr_node_tensor = tf.nn.relu(curr_node_tensor) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") multi = tf.matmul(node_tensors, U) logits = multi + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) cost = tf.reduce_sum(cost) with tf.variable_scope("Composition", reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") regularization = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) loss = cost + self.config.l2 * regularization #loss = cost + self.config.l2 * tf.nn.l2_loss(W1) # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE optim = tf.train.GradientDescentOptimizer(self.config.lr) #optim = tf.train.AdamOptimizer(0.003) train_op = optim.minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE yhat = tf.nn.softmax(y) predictions = tf.argmax(yhat, 1) #predictions = tf.Print(predictions,[yhat, predictions], summarize=30) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.initialize_all_variables() sess.run(init) new_model = False else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) for _ in xrange(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') #best_val_loss = float('inf') best_val_acc = 0 best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d'%epoch if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print 'annealed lr to %f'%self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val print 'validation loss %f' % val_loss #if val_loss < best_val_loss: if val_acc > best_val_acc: shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name) #best_val_loss = val_loss best_val_acc = val_acc best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n'%stopped return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps), name='input') self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps), name='labels') self.dropout_placeholder = tf.placeholder(tf.float32, shape=(), name='dropout') ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE with tf.variable_scope("embedding"): L = tf.get_variable("L", (len(self.vocab), self.config.embed_size), initializer=xavier_weight_init()) input_wvs = tf.nn.embedding_lookup(L, self.input_placeholder) input_wvs = tf.split(input_wvs, self.config.num_steps, 1) inputs = [] for i in range(len(input_wvs)): inputs.append(tf.squeeze(input_wvs[i], squeeze_dims=[1])) ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.variable_scope("projection"): U = tf.get_variable("U", (self.config.hidden_size, len(self.vocab)), initializer=xavier_weight_init()) b2 = tf.Variable(tf.zeros(len(self.vocab)), name='b2') outputs = [] for rnn_out in rnn_outputs: outputs.append(tf.matmul(rnn_out, U) + b2) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE logits = output e = tf.expand_dims(input=output, axis=0) print(e) targets = self.labels_placeholder f = tf.expand_dims(input=tf.reshape(targets, [-1]), axis=0) print(targets) weights = tf.ones((self.config.batch_size * self.config.num_steps)) print(weights) g = tf.expand_dims(input=weights, axis=0) loss = sequence_loss(e, f, g) # all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] # a = tf.reshape(self.labels_placeholder, [-1]) # b = tf.ones((self.config.batch_size * self.config.num_steps)) # d = [output] # c = sequence_loss(logits=output, targets=[a], weights=b) # cross_entropy = sequence_loss( # [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) # tf.add_to_collection('total_loss', cross_entropy) # loss = tf.add_n(tf.get_collection('total_loss')) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE train_op = tf.train.AdamOptimizer(self.config.lr).minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE with tf.variable_scope('inputdropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] with tf.variable_scope("hidden"): H = tf.get_variable( "U", (self.config.hidden_size, self.config.hidden_size), initializer=xavier_weight_init()) I = tf.get_variable( "I", (self.config.embed_size, self.config.hidden_size), initializer=xavier_weight_init()) b1 = tf.Variable(tf.zeros(self.config.hidden_size), name='b1') self.initial_state = tf.zeros( (self.config.batch_size, self.config.hidden_size)) rnn_outputs = [self.initial_state] for i in range(self.config.num_steps): rnn_outputs.append( tf.nn.sigmoid( tf.matmul(inputs[i], I) + tf.matmul(rnn_outputs[i], H) + b1)) del rnn_outputs[0] self.final_state = rnn_outputs[-1] with tf.variable_scope('hiddendropout'): rnn_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs ] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class Model(): def __init__(self, config): self.config = config self.load_data() self.build_model() def load_vocab(self,debug): self.vocab = Vocab() if debug: self.vocab.construct(get_words_dataset('dev')) else: self.vocab.construct(get_words_dataset('train')) self.vocab.build_embedding_matrix(self.config.word_embed_size) self.embedding_matrix = self.vocab.embedding_matrix def load_data(self, debug=False): """ Loads starter word-vectors and train/dev/test data. """ self.load_vocab(debug) config = self.config if debug: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) else: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'train', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) print('min len: ', np.min(self.len2_train)) def build_model(self): config = self.config k = config.sentence_embed_size L = config.sent_len # input tensors self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent1') self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent2') self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1') self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2') self.labels_ph = tf.placeholder(tf.float32, shape=[None, config.label_size], name='label') self.kp_ph = tf.placeholder(tf.float32, name='kp') kp = self.kp_ph # set embedding matrix to pretrained embedding init_embeds = tf.constant(self.embedding_matrix, dtype='float32') word_embeddings = tf.get_variable( dtype='float32', name='word_embeddings', initializer=init_embeds, trainable=False) # no fine-tuning of word embeddings # x1 and x2 have shape (?, L, k) x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph) x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph) x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp) # encode premise sentence with 1st LSTM with tf.variable_scope('rnn1'): cell1 = tf.contrib.rnn.LSTMCell(num_units=k, state_is_tuple=True) cell1 = tf.contrib.rnn.DropoutWrapper(cell1, input_keep_prob=kp, output_keep_prob=kp) out1, fstate1 = tf.nn.dynamic_rnn( cell=cell1, inputs=x1, sequence_length=self.len1_ph, dtype=tf.float32) # encode hypothesis with 2nd LSTM # using final state of 1st LSTM as initial state with tf.variable_scope('rnn2'): cell2 = tf.contrib.rnn.LSTMCell(num_units=k, state_is_tuple=True) cell2 = tf.contrib.rnn.DropoutWrapper(cell2, input_keep_prob=kp, output_keep_prob=kp) out2, fstate2 = tf.nn.dynamic_rnn( cell=cell2, inputs=x2, sequence_length=self.len2_ph, initial_state=fstate1, dtype=tf.float32) Y = out1 Y_mod =tf.reshape(Y, [-1, k]) W_y = tf.get_variable(name='W_y', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_h = tf.get_variable(name='W_h', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k])) W_r = tf.get_variable(name='W_r', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_t = tf.get_variable(name='W_t', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_r = tf.get_variable(name='b_r', initializer=tf.zeros([k])) w = tf.get_variable(name='w', shape=[k, 1], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L])) rt_1 = tf.zeros([tf.shape(self.len1_ph)[0], k]) attention = [] r_outputs = [] for t in range(L): ht = out2[:,t,:] Ht = tf.reshape(tf.tile(ht, [1, L]), [-1, L, k]) Ht_mod = tf.reshape(Ht, [-1, k]) Rt_1 = tf.reshape(tf.tile(rt_1, [1, L]), [-1, L, k]) Rt_1_mod = tf.reshape(Rt_1, [-1, k]) Mt = tf.nn.tanh( tf.reshape(tf.matmul(Y_mod, W_y), [-1, L, k]) + tf.reshape(tf.matmul(Ht_mod, W_h), [-1, L, k]) + tf.reshape(tf.matmul(Rt_1_mod, W_r), [-1, L, k]) ) Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w) alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, L]) ) alphat_Y = tf.reshape(tf.matmul(alphat, Y), [-1, k]) rt = alphat_Y + tf.nn.tanh(tf.matmul(rt_1, W_t) ) rt_1 = rt attention.append(alphat) r_outputs.append(rt) r_outputs = tf.stack(r_outputs) self.attention = tf.stack(attention) r_outputs = tf.transpose(r_outputs, [1, 0, 2]) def get_last_relevant_output(out, seq_len): rng = tf.range(0, tf.shape(seq_len)[0]) indx = tf.stack([rng, seq_len - 1], 1) last = tf.gather_nd(out, indx) return last rN = get_last_relevant_output(r_outputs, self.len2_ph) hN = get_last_relevant_output(out2, self.len2_ph) W_p = tf.get_variable(name='W_p', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_x = tf.get_variable(name='W_x', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_hs = tf.get_variable(name='b_hs', initializer=tf.zeros([k])) # sentence pair representation h_s = tf.nn.tanh(tf.matmul(rN, W_p) + tf.matmul(hN, W_x) ) y = h_s # MLP classifier on top hidden_sizes = config.hidden_sizes for layer, size in enumerate(hidden_sizes): if layer > 0: previous_size = hidden_sizes[layer-1] else: previous_size = k W = tf.get_variable(name='W{}'.format(layer), shape=[previous_size, size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b = tf.get_variable(name='b{}'.format(layer), initializer=tf.zeros([size])) y = tf.nn.relu(tf.matmul(y, W) + b) y = tf.nn.dropout(y, kp) W_softmax = tf.get_variable(name='W_softmax', shape=[hidden_sizes[-1], config.label_size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_softmax = tf.get_variable(name='b_softmax', initializer=tf.zeros([config.label_size])) logits = tf.matmul(y, W_softmax) + b_softmax cross_entropy_loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(self.labels_ph, logits) ) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.loss = cross_entropy_loss #+ tf.add_n(reg_losses) optimizer = tf.train.AdamOptimizer(learning_rate=config.lr) gradients, variables = zip(*optimizer.compute_gradients(self.loss)) gradients, _ = tf.clip_by_global_norm(gradients, config.max_grad_norm) self.train_op = optimizer.apply_gradients(zip(gradients, variables)) self.probs = tf.nn.softmax(logits) self.predictions = tf.argmax(self.probs, 1) correct_prediction = tf.equal( tf.argmax(self.labels_ph, 1), self.predictions) self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32')) def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch, len2_batch, label_batch, keep_prob): feed_dict = { self.sent1_ph: sent1_batch, self.sent2_ph: sent2_batch, self.len1_ph: len1_batch, self.len2_ph: len2_batch, self.labels_ph: label_batch, self.kp_ph: keep_prob } return feed_dict def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels, verbose=100): orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data, sent2_data, len1_data, len2_data, input_labels ) kp = self.config.kp total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = int( orig_sent1.shape[0] / self.config.batch_size) for step, (sent1, sent2, len1, len2, y) in enumerate( data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y, batch_size=self.config.batch_size, label_size=self.config.label_size)): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) loss, total_correct, _ = session.run( [self.loss, self.correct_predictions, self.train_op], feed_dict=feed) total_processed_examples += len(y) total_correct_examples += total_correct total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None): """Make predictions from the provided model.""" # If y is given, the loss is also calculated # We deactivate dropout by setting it to 1 kp = 1.0 losses = [] results = [] if np.any(y): data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) else: data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) for step, (sent1, sent2, len1, len2, y) in enumerate(data): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) if np.any(y): loss, preds = session.run( [self.loss, self.predictions], feed_dict=feed) losses.append(loss) else: preds = session.run(self.predictions, feed_dict=feed) results.extend(preds) return np.mean(losses), np.array(results) def get_attention(self, session, sent1, sent2): kp = 1.0 sent1 = utils.encode_sentence(self.vocab, sent1) print(sent1) sent2 = utils.encode_sentence(self.vocab, sent2) print(sent2) sent1 = utils.pad_sentence(self.vocab, sent1, self.config.sent_len, 'post') sent2 = utils.pad_sentence(self.vocab, sent2, self.config.sent_len, 'post') len1, len2 = np.array([len(sent1)]), np.array([len(sent2)]) sent1_arr = np.array(sent1).reshape((1,-1)) sent2_arr = np.array(sent2).reshape((1,-1)) y = np.array([0,1,0]).reshape((1,-1)) feed = self.create_feed_dict(sent1_arr, sent2_arr, len1, len2, y, kp) preds, alphas = session.run([self.predictions, self.attention], feed_dict=feed) return preds, alphas
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] # Load word vectors all_embeddings = np.loadtxt("data/ner/wordVectors.txt") all_words = np.genfromtxt("data/ner/vocab.txt",dtype='str') # L is the embedding matrix L = np.zeros((len(self.vocab), self.config.embed_size)) m = 0 for i in range(len(self.vocab)): word = self.vocab.index_to_word[i] index = np.where(all_words == word) if index[0].shape[0] == 0: m += 1 L[i,:] = all_embeddings[0,:] else: index = np.asscalar(index[0]) L[i,:] = all_embeddings[index,:] self.L = tf.constant(L, dtype = tf.float32) def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape = [None, self.config.num_steps], name="input_placeholder") self.labels_placeholder = tf.placeholder(tf.int32, shape = [None, self.config.num_steps], name="labels_placeholder") self.dropout_placeholder = tf.placeholder(tf.float32, shape = (), name="dropout_placeholder") ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ with tf.device('/cpu:0'): ### YOUR CODE HERE inputs = [] split_inputs = tf.split(self.input_placeholder, self.config.num_steps, axis=1) for i in range(self.config.num_steps): a_input = tf.cast(tf.one_hot(tf.squeeze(split_inputs[i], axis=1), len(self.vocab)), tf.float32) inputs.append(tf.matmul(a_input,self.L)) ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE outputs = [] with tf.variable_scope("RNN",reuse=tf.AUTO_REUSE): self.U = tf.get_variable("U",shape=[self.config.hidden_size,len(self.vocab)],\ initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable("b2",shape=[len(self.vocab)],\ initializer=tf.contrib.layers.xavier_initializer()) for i in range(self.config.num_steps): output = tf.matmul(rnn_outputs[i], self.U) + self.b2 outputs.append(output) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE return sequence_loss(output, self.labels_placeholder,\ weights=tf.ones([self.config.batch_size, self.config.num_steps]),\ average_across_timesteps=False,average_across_batch=False) ### END YOUR CODE def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ with tf.variable_scope("loss",reuse = tf.AUTO_REUSE): train_op = tf.train.AdamOptimizer(self.config.lr).minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() # self.rnn_outputs = self.add_model(self.inputs) # self.rnn_outputs = self.add_model_GRU(self.inputs) self.rnn_outputs = self.add_model_LSTM(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.stack(self.outputs, 1) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. You can use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE # Create parameters for RNN self.initial_state = tf.zeros([self.config.batch_size,self.config.hidden_size]) with tf.variable_scope("RNN",reuse = tf.AUTO_REUSE): # TODO: try GRU or LSTM self.H = tf.get_variable("H",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.I = tf.get_variable("I",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.b1 = tf.get_variable("b1",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Compute RNN outputs rnn_outputs = [] h = self.initial_state for i in range(self.config.num_steps): curr_input = inputs[i] curr_input = tf.nn.dropout(curr_input,keep_prob=self.dropout_placeholder) h = tf.nn.dropout(tf.nn.sigmoid(tf.matmul(h, self.H) + tf.matmul(curr_input, self.I) + self.b1)\ ,keep_prob=self.dropout_placeholder) rnn_outputs.append(h) self.final_state = h ### END YOUR CODE return rnn_outputs def add_model_GRU(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. You can use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE # Create parameters for RNN self.initial_state = tf.zeros([self.config.batch_size,self.config.hidden_size]) with tf.variable_scope("GRU",reuse = tf.AUTO_REUSE): # TODO: try GRU or LSTM self.Wu_h = tf.get_variable("Wu_h",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Wu_x = tf.get_variable("Wu_x",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bu = tf.get_variable("bu",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Wr_h = tf.get_variable("Wr_h",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Wr_x = tf.get_variable("Wr_x",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.br = tf.get_variable("br",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Wc_h = tf.get_variable("Wc_h",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Wc_x = tf.get_variable("Wc_x",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bc = tf.get_variable("bc",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Compute RNN outputs rnn_outputs = [] h = self.initial_state for i in range(self.config.num_steps): curr_input = inputs[i] # Apply drop out curr_input = tf.nn.dropout(curr_input,keep_prob=self.dropout_placeholder) # h = tf.nn.dropout(tf.nn.sigmoid(tf.matmul(h, self.H) + tf.matmul(curr_input, self.I) + self.b1)\ # ,keep_prob=self.dropout_placeholder) u = tf.nn.sigmoid(tf.matmul(h, self.Wu_h) + tf.matmul(curr_input, self.Wu_x) + self.bu) r = tf.nn.sigmoid(tf.matmul(h, self.Wr_h) + tf.matmul(curr_input, self.Wr_x) + self.br) c = tf.nn.tanh(tf.matmul(tf.multiply(r, h), self.Wc_h) + tf.matmul(curr_input, self.Wc_x) + self.bc) h = tf.multiply(u, h) + tf.multiply(tf.ones_like(u) - u, c) rnn_outputs.append(h) self.final_state = h ### END YOUR CODE return rnn_outputs def add_model_LSTM(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. You can use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE # Create parameters for RNN self.initial_state = tf.zeros([self.config.batch_size,self.config.hidden_size]) with tf.variable_scope("LSTM",reuse = tf.AUTO_REUSE): # TODO: try GRU or LSTM # u-gate self.Wu = tf.get_variable("Wu",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Uu = tf.get_variable("Uu",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bu = tf.get_variable("bu",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Output gate self.Wo = tf.get_variable("Wo",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Uo = tf.get_variable("Uo",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bo = tf.get_variable("bo",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Forget gate self.Wf = tf.get_variable("Wf",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Uf = tf.get_variable("Uf",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bf = tf.get_variable("bf",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Relevance gate self.Wr = tf.get_variable("Wr",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Ur = tf.get_variable("Ur",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.br = tf.get_variable("br",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Control gate self.Wc = tf.get_variable("Wc",shape=[self.config.hidden_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.Uc = tf.get_variable("Uc",shape=[self.config.embed_size,self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) self.bc = tf.get_variable("bc",shape=[self.config.hidden_size],\ initializer=tf.contrib.layers.xavier_initializer()) # Compute RNN outputs rnn_outputs = [] h = self.initial_state for i in range(self.config.num_steps): curr_input = inputs[i] # Apply drop out curr_input = tf.nn.dropout(curr_input,keep_prob=self.dropout_placeholder) u = tf.nn.sigmoid(tf.matmul(h, self.Wu) + tf.matmul(curr_input, self.Uu) + self.bu) r = tf.nn.sigmoid(tf.matmul(h, self.Wr) + tf.matmul(curr_input, self.Ur) + self.br) f = tf.nn.sigmoid(tf.matmul(h, self.Wf) + tf.matmul(curr_input, self.Uf) + self.bf) o = tf.nn.sigmoid(tf.matmul(h, self.Wo) + tf.matmul(curr_input, self.Uo) + self.bo) c_tilde = tf.nn.tanh(tf.matmul(tf.multiply(r, h), self.Wc) + tf.matmul(curr_input, self.Uc) + self.bc) c = tf.multiply(u, c_tilde) + tf.multiply(f, h) h = tf.multiply(o, c) rnn_outputs.append(h) self.final_state = h ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32, shape=None) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE L = tf.Variable(tf.random_uniform([len(self.vocab), self.config.embed_size], -1.0, 1.0), name="L") # Shape of input_placeholder : (batch_size, num_steps) # Shape of embed : (num_steps, batch_size, embed_size) embed = tf.nn.embedding_lookup(L, tf.transpose(self.input_placeholder, perm=[1,0])) inputs = [tf.squeeze(ts, [0]) for ts in tf.split(0, self.config.num_steps, embed)] ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size(LIBIN edited)). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab)) """ ### YOUR CODE HERE with tf.variable_scope("projection", initializer = xavier_weight_init(), reuse=None): U = tf.get_variable("U", shape=(self.config.hidden_size, len(self.vocab))) b2 = tf.get_variable("b2", shape=(len(self.vocab), )) outputs = [tf.matmul(ts, U) + b2 for ts in rnn_outputs] ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py Args: output: A tensor of shape (None, self.vocab) (LIBIN : not used) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # output shape : [num_steps * (batch_size, len(self.vocab))] # targets shape : [num_steps * (batch_size, )] # weights shape : [num_steps * (batch_size, )] targets = [tf.squeeze(ts,[1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder)] weights = [tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps)] loss = sequence_loss(output, targets, weights) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) #print self.outputs #print tf.concat(1, self.outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide # Libin : output not used output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) # output is a single long sequence tensor concatenated # orderly by all short squences in current batch. # Each element in output is a tensor of size self.vocab which gives the probability # distribution of current word #print output #raw_input() self.calculate_loss = self.add_loss_op(self.outputs) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE rnn_outputs = [] self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) with tf.variable_scope("RNN", initializer=xavier_weight_init(), reuse=None): H = tf.get_variable("H", shape=(self.config.hidden_size, self.config.hidden_size)) I = tf.get_variable("I", shape=(self.config.embed_size, self.config.hidden_size)) b1 = tf.get_variable("b1", shape=(self.config.hidden_size, )) prev_h = self.initial_state for step_input in inputs: step_input = tf.nn.dropout(step_input, self.dropout_placeholder) prev_h = tf.sigmoid(tf.matmul(prev_h, H) + tf.matmul(step_input, I) + b1) #prev_h = tf.nn.dropout(prev_h, self.dropout_placeholder) rnn_outputs.append(prev_h) self.final_state = prev_h ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: # The derivation of pp can be checked in question Q3-(a) sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
""" self.node_list = [] root_node = self.walk_tree(x.root) all_nodes = torch.cat(self.node_list) #now I need to project out return all_nodes def main(): print("do nothing") if __name__ == '__main__': train_data, dev_data, test_data = tr.simplified_data(train_size, 100, 200) vocab = Vocab() train_sents = [t.get_words() for t in train_data] vocab.construct(list(itertools.chain.from_iterable(train_sents))) model = RNN_Model(vocab, embed_size=50) main() lr = 0.01 loss_history = [] optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0.0) # params (iterable): iterable of parameters to optimize or dicts defining # parameter groups # lr (float): learning rate # momentum (float, optional): momentum factor (default: 0) # weight_decay (float, optional): weight decay (L2 penalty) (default: 0) #torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, dampening=0, weight_decay=0) # print(model.fcl._parameters['weight']) for epoch in range(max_epochs):
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] print "vocab size ", len(self.vocab) print "training samples ", len(self.encoded_train) def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type #TODO: wrong tf.float32 --> should be tf.int32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE #raise NotImplementedError self.input_placeholder = tf.placeholder(name="inputs", dtype=tf.int32, shape=(None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(name="outputs", dtype=tf.int32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(name="dropout", dtype=tf.float32) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. ----> From the slides train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_input) <----- Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE #current mini batch #for t in range(self.config.num_steps): # t = 0 to n time steps (processed at once) # no need! understand tf.nn.embedding_lookup and you will know why! with tf.variable_scope('embed') as embed_scope: embeddings = tf.get_variable("embeddings", shape=[len(self.vocab), self.config.embed_size], initializer=xavier_weight_init()) embedded_inputs = tf.nn.embedding_lookup(params=embeddings, ids=tf.transpose(self.input_placeholder)) #this miniB print "embedded look up -> ", embedded_inputs.get_shape() # dims are embedded look up -> (10, ?, 50) embedded_inputs = tf.split(embedded_inputs, self.config.num_steps, axis=0) # each 'cell' in the RNN print "split gives ", len(embedded_inputs) , " entries of dim ", embedded_inputs[0].shape #inputs = embedded_inputs #print "split embedded look up -> ", len(embedded_inputs) , " ", embedded_inputs[0].shape inputs = map(lambda x: tf.squeeze (x, axis=[0]), embedded_inputs) #get rid of the ? dim above #print "inputs after tf.squeeze are: ", inputs #raise NotImplementedError ### END YOUR CODE print "at the end of embedLayer ", len(inputs), " ", inputs[0].shape return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). wrong!!!! hidden_size not embed_size Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE #raise NotImplementedError #this is W_hy print "rnn_outputs dims ", rnn_outputs[0].shape U = tf.get_variable("U", shape=[self.config.hidden_size, len(self.vocab)], initializer=xavier_weight_init()) b_2 = tf.get_variable("b_2", shape=[len(self.vocab)], initializer=xavier_weight_init()) outputs = [] for t_step in range(len(rnn_outputs)): outputs.append(tf.matmul(tf.nn.dropout(rnn_outputs[t_step], self.dropout_placeholder), U) + b_2) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE b_size = self.config.batch_size n_steps = self.config.num_steps targets = [tf.reshape(self.labels_placeholder, [-1])] weights = [tf.ones([b_size*n_steps])] print "\n\nLoss Op: " print "logits ", len(output), " - ", output[0].shape t = tf.reshape(self.labels_placeholder, [b_size, n_steps]) print "labels ", t #print "weights ", w = tf.ones([b_size, n_steps]) print "weights ", w f = tf.reshape(output, [b_size, n_steps, len(self.vocab)]) print "reshaped ", f s2s_loss = sequence_loss(logits=f, targets=t, weights=w) self.sMax = tf.nn.softmax(f) print "smax ", self.sMax tf.add_to_collection('total_loss', s2s_loss) loss = s2s_loss print loss #raise NotImplementedError ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr) train_op = optimizer.minimize(loss) #raise NotImplementedError ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) print 'outputs shape: ', self.outputs[0].shape print len(self.outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] #print "0th pred ", self.predictions[0] #print "vocab ", self.vocab # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide #outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) #self.calculate_loss = self.add_loss_op(self.outputs) #TODO: API changed for seq to seq loss! #outputs = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(self.outputs) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE #raise NotImplementedError num_steps = len(inputs) b_size, e_size = self.config.batch_size, self.config.embed_size print num_steps , " steps " print b_size, " seq " print e_size, "embeddings" h_size = self.config.hidden_size self.initial_state = tf.zeros(shape=(b_size, h_size)) embeddings = tf.get_collection('embeddings', 'embed_scope') rnn_outputs = [] with tf.variable_scope('RNN') as scope: #scope.reuse_variables() H = tf.get_variable(name='H', dtype=tf.float32, shape=[h_size, h_size]) I = tf.get_variable(name='I',dtype=tf.float32, shape=[e_size, h_size]) b_1 = tf.get_variable(name='b_1', dtype=tf.float32, shape=[h_size]) #at t = 0 rnn_outputs.append(tf.sigmoid( tf.matmul(self.initial_state, H) + tf.matmul(inputs[0], I) + b_1) ) #remaining for i in range(1, num_steps): rnn_outputs.append(tf.sigmoid( tf.matmul(rnn_outputs[i-1], H) + tf.matmul(inputs[i], I) + b_1 )) self.final_state = rnn_outputs[num_steps-1] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' embed_size = self.config.embed_size vocab_size = len(self.vocab) output_size = self.config.label_size with tf.variable_scope('Composition'): ### YOUR CODE HERE embedding = tf.get_variable("embedding", shape=(vocab_size, embed_size)) W1 = tf.get_variable("W1", shape=(2 * embed_size, embed_size)) b1 = tf.get_variable("b1", shape=(1, embed_size)) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", shape=(embed_size, output_size)) bs = tf.get_variable("bs", shape=(1, output_size)) ### END YOUR CODE self.optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr) # dummy_total is a simple sum to ensure that the variables for the AdamOptimizer # are created for initialization and before restore the variables later. # It should never actually get executed. dummy_total = tf.constant(0.0) for v in tf.trainable_variables(): dummy_total +=tf.reduce_sum(v) self.dummy_minimizer = self.optimizer.minimize(dummy_total) # we then initialize variables, and because of the self.dummy_minimizer, # all of the necessary variable/slot pairs get added and included in the # saver variables def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here --Hint: Store a node's vector representation in node.tensor so it can be used by it's parent-- Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") ### END YOUR CODE # THOUGHT: Batch together all leaf nodes and all non leaf nodes node_tensors = OrderedDict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE curr_node_tensor = tf.gather(embedding, tf.constant([node.label]), name="leaf_lookup") ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) ### YOUR CODE HERE left = node_tensors[node.left] right = node_tensors[node.right] concat = tf.concat(1, [left, right]) composition = tf.matmul(concat, W1) + b1 # TODO save on number of zero tensors... curr_node_tensor = tf.maximum(composition, tf.zeros_like(composition)) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") # NOTE: tf.add supports Broadcast logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE labels = tf.convert_to_tensor(labels, dtype=tf.int64) softmax_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels) l2 = self.config.l2 with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") l2_loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) l2_loss *= l2 loss = tf.reduce_sum(softmax_loss) + l2_loss # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE train_op = self.optimizer.minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE # pick max of softmax predictions in each batch predictions = tf.argmax(tf.nn.softmax(tf.cast(y, tf.float64)), dimension=1) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.initialize_all_variables() sess.run(init) new_model = False else: saver = tf.train.Saver() #saver.restore(sess, './weights/%s.temp'%self.config.model_name) saver.restore(sess, './weights_l2/%s.temp'%self.config.model_name) for _ in xrange(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights_l2"): os.makedirs("./weights_l2") saver.save(sess, './weights_l2/%s.temp'%self.config.model_name, write_meta_graph=False) train_preds, _ = self.predict(self.train_data, './weights_l2/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights_l2/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d'%epoch if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print 'annealed lr to %f'%self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile('./weights_l2/%s.temp'%self.config.model_name, './weights_l2/%s'%self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n'%stopped writeToResults('%s,%s,%s,%s,%s'%(self.config.model_name,stopped,complete_loss_history[-1],train_acc_history[-1],val_acc_history[-1])) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 !!!WARN=>int32!!! dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='Labels') self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout') ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU #with tf.device('/cpu:0'): with tf.device('/gpu:0'): ### YOUR CODE HERE embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size]) embed = tf.nn.embedding_lookup(embedding, self.input_placeholder) #inputs = [tf.squeeze(i) for i in tf.split(1, self.config.num_steps, embed)] ###unpack a Tensor of shape [n, m, o] as the sequence, into a sequence of n-tensors, each with shape [m, o]. ###http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html inputs = tf.unpack(embed, axis=1) ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.variable_scope("Layer"): U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)]) b_2 = tf.get_variable('b_2', [len(self.vocab)]) outputs = [tf.matmul(out, U) + b_2 for out in rnn_outputs] ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE loss_init = [tf.ones([self.config.batch_size * self.config.num_steps])] reshaped_labels = tf.reshape( self.labels_placeholder, [self.config.batch_size * self.config.num_steps, -1]) cross_entropy = sequence_loss([output], [reshaped_labels], loss_init, len(self.vocab)) #add cross_entropy (loss between pred and labels) tf.add_to_collection("total_loss", cross_entropy) #tf.get_collection(name, scope=None) : Returns a list of values in the collection with the given name loss = tf.add_n(tf.get_collection("total_loss")) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE #The memory state of the network is initialized with a vector of zeros #and gets updated after reading each word (https://www.tensorflow.org/tutorials/recurrent/) self.initial_state = tf.ones( [self.config.batch_size, self.config.hidden_size]) with tf.variable_scope("RNN") as scope: H = tf.get_variable( 'H', [self.config.hidden_size, self.config.hidden_size]) I = tf.get_variable( 'I', [self.config.embed_size, self.config.hidden_size]) b_1 = tf.get_variable('b_1', [self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for input in inputs: input_dropout = tf.nn.dropout(input, self.dropout_placeholder) state = tf.sigmoid( tf.matmul(state, H) + tf.matmul(input_dropout, I) + b_1) output_dropout = tf.nn.dropout(state, self.dropout_placeholder) rnn_outputs.append(output_dropout) self.final_state = rnn_outputs[-1] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def __init__(self, config): self.config = config self.load_data() self.merged_summaries = None self.summary_writer = None self.is_a_leaf = tf.placeholder(tf.bool, [None], name="is_a_leaf") self.left_child = tf.placeholder(tf.int32, [None], name="lchild") self.right_child = tf.placeholder(tf.int32, [None], name="rchild") self.word_index = tf.placeholder(tf.int32, [None], name="word_index") self.labelholder = tf.placeholder(tf.int32, [None], name="labels_holder") self.learning_rate = tf.placeholder(tf.float32, (), name="learning_rate") self.l2_reg = tf.placeholder(tf.float32, (), name="l2_regularization_weight") self.add_model_vars() # tensor array stores the vectors (embedded or composed) self.tensor_array_op = None self.prediction = None self.logits = None self.root_logits = None self.root_predict = None self.saver = None self.best_saver = None self.root_loss = None self.full_loss = None self.training_op = None tf.add_to_collection('hyperparameters/lr', self.config.lr) tf.add_to_collection('hyperparameters/l2', self.config.l2) tf.add_to_collection('hyperparameters/embed_size', self.config.embed_size) tf.add_to_collection('hyperparameters/label_size', self.config.label_size) # tensor_array_op is the operation on the TensorArray # private functions used to construct the graph. def _embed_word(self, word_index): with tf.variable_scope("Composition", reuse=True) as scope: embedding = tf.get_variable("embedding") return tf.expand_dims(tf.gather(embedding, word_index), 0) # private functions used to construct the graph. def _combine_children(self, tensor_concat, left_idx, right_idx): left_tensor = tf.expand_dims(tf.gather(tensor_concat, left_idx), 0) right_tensor = tf.expand_dims(tf.gather(tensor_concat, right_idx), 0) with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable('W1') b1 = tf.get_variable('b1') return tf.nn.relu( tf.matmul(tf.concat([left_tensor, right_tensor], 1), W1) + b1) def _loop_over_tree(self, i, tensor_list): is_leaf = tf.gather(self.is_a_leaf, i) word_idx = tf.gather(self.word_index, i) left_child = tf.gather(self.left_child, i) right_child = tf.gather(self.right_child, i) node_tensor = tf.cond( is_leaf, lambda: self._embed_word(word_idx), lambda: self._combine_children( tensor_list, left_child, right_child)) tensor_list = tf.concat([tensor_list, node_tensor], 0) i = tf.add(i, 1) return i, tensor_list # i is the index (over data stored in the placeholders) # identical type[out] = type[in]; can be used in while_loop # so initial case iteration -> puts left most leaf on the tensorarray (and increments i) # next iteration -> puts next left most (leaf on stack) and increments i # .... # until all the leaves are on the stack in the correct order # starts combining the leaves after and adding to the stack def construct_tensor_array(self): loop_condition = lambda i, tensor_array: \ tf.less(i, tf.squeeze(tf.shape(self.is_a_leaf))) # tf.squeeze(tf.shape(placeholder)) <--> length of the storage of all leaves left_most_element = self._embed_word(tf.gather(self.word_index, 0)) # index is 1 i1 = tf.constant(1, dtype=tf.int32) while_loop_op = tf.while_loop(cond=loop_condition, body=self._loop_over_tree, loop_vars=[i1, left_most_element], shape_invariants=[ i1.get_shape(), tf.TensorShape( [None, self.config.embed_size]) ]) return while_loop_op[1] def inference_op(self, predict_only_root=False): if predict_only_root: return self.root_logits_op() return self.logits_op() def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition') as scope: ### YOUR CODE HERE embedding = tf.get_variable( "embedding", [self.vocab.total_words, self.config.embed_size]) W1 = tf.get_variable( "W1", [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.get_variable("b1", [1, self.config.embed_size]) l2_loss = tf.nn.l2_loss(W1) tf.add_to_collection(name="l2_loss", value=l2_loss) variable_summaries(embedding, embedding.name) variable_summaries(W1, W1.name) variable_summaries(b1, b1.name) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable( "U", [self.config.embed_size, self.config.label_size]) bs = tf.get_variable("bs", [1, self.config.label_size]) variable_summaries(U, U.name) variable_summaries(bs, bs.name) l2_loss = tf.nn.l2_loss(U) tf.add_to_collection(name="l2_loss", value=l2_loss) ### END YOUR CODE def add_model(self): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ if self.tensor_array_op is None: self.tensor_array_op = self.construct_tensor_array() return self.tensor_array_op def add_projections_op(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def logits_op(self): # this is an operation on the updated tensor_array if self.logits is None: self.logits = self.add_projections_op(self.tensor_array_op) return self.logits def root_logits_op(self): # construct once if self.root_logits is None: root_node = tf.expand_dims(self.tensor_array_op[-1, :], 0) self.root_logits = self.add_projections_op(root_node) return self.root_logits def root_prediction_op(self): if self.root_predict is None: self.root_predict = tf.squeeze(tf.argmax(self.root_logits_op(), 1)) return self.root_predict def full_loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.full_loss is None: loss = None # YOUR CODE HERE l2_loss = self.l2_reg * tf.add_n(tf.get_collection("l2_loss")) idx = tf.where(tf.less(self.labelholder, 2)) logits = tf.gather(logits, idx) labels = tf.gather(labels, idx) objective_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar(name="loss_l2", tensor=l2_loss) tf.summary.scalar(name="loss_objective", tensor=tf.reduce_sum(objective_loss)) tf.summary.scalar(name="loss_total", tensor=loss) self.full_loss = loss # END YOUR CODE return self.full_loss def loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.root_loss is None: # construct once guard loss = None # YOUR CODE HERE l2_loss = self.l2_reg * tf.add_n(tf.get_collection("l2_loss")) objective_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar(name="root_loss_l2", tensor=l2_loss) tf.summary.scalar(name="root_loss_objective", tensor=tf.reduce_sum(objective_loss)) tf.summary.scalar(name="root_loss_total", tensor=loss) self.root_loss = loss # END YOUR CODE return self.root_loss def get_saver(self): if self.saver is None: print("Creating Saver;") self.saver = tf.train.Saver() return self.saver def get_best_saver(self): if self.best_saver is None: print("Creating Best Saver (keeps only one checkpoint);") self.best_saver = tf.train.Saver(max_to_keep=1, name="best_saver") return self.best_saver def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ if self.training_op is None: # YOUR CODE HERE optimizer = tf.train.AdamOptimizer( self.learning_rate ) # tf.train.GradientDescentOptimizer(self.config.lr) # optimizer = tf.train.AdamOptimizer(self.config.lr) tf.summary.scalar("lr", self.learning_rate) self.training_op = optimizer.minimize(loss, global_step=global_step) # END YOUR CODE return self.training_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ if self.prediction is None: # YOUR CODE HERE self.prediction = tf.argmax(y, dimension=1) # END YOUR CODE return self.prediction def build_feed_dict(self, in_node, is_training=True): nodes_list = [] tr.leftTraverse(in_node, lambda node, args: args.append(node), nodes_list) if is_training: feed_dict = { self.is_a_leaf: [n.isLeaf for n in nodes_list], self.left_child: [ nodes_list.index(n.left) if not n.isLeaf else -1 for n in nodes_list ], self.right_child: [ nodes_list.index(n.right) if not n.isLeaf else -1 for n in nodes_list ], self.word_index: [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ], self.labelholder: [n.label for n in nodes_list], self.learning_rate: self.config.lr, self.l2_reg: self.config.l2 } else: feed_dict = { self.is_a_leaf: [n.isLeaf for n in nodes_list], self.left_child: [ nodes_list.index(n.left) if not n.isLeaf else -1 for n in nodes_list ], self.right_child: [ nodes_list.index(n.right) if not n.isLeaf else -1 for n in nodes_list ], self.word_index: [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ], self.labelholder: [n.label for n in nodes_list], self.learning_rate: self.config.lr, self.l2_reg: 0. } return feed_dict def predict(self, trees, sess, load_weights=False, get_loss=False, is_training=True): """Make predictions from the provided model.""" results = [] losses = [] if load_weights is False: print("using current session weights for prediction") else: print("Loading weights from (best weights);") best_saver = self.get_best_saver() ckpt = tf.train.get_checkpoint_state('./weights/best') if ckpt and ckpt.model_checkpoint_path: # print(best_saver.last_checkpoints[-1]) best_saver.restore(sess, best_saver.last_checkpoints[-1]) # print(ckpt.model_checkpoint_path) # print(tf.report_uninitialized_variables(tf.global_variables())) logits = self.root_logits_op() # evaluation is based upon the root node root_loss = self.loss_op(logits=logits, labels=self.labelholder[-1:]) root_prediction_op = self.root_prediction_op() for t in trees: feed_dict = self.build_feed_dict(t.root, is_training) if get_loss: root_prediction, loss = sess.run( [root_prediction_op, root_loss], feed_dict=feed_dict) losses.append(loss) results.append(root_prediction) else: root_prediction = sess.run(root_prediction_op, feed_dict=feed_dict) results.append(root_prediction) return results, losses # need to rework this: (OP creation needs to be made independent of using OPs) def run_epoch(self, sess, summary_writer, new_model=False, verbose=True, epoch=0): loss_history = [] random.shuffle(self.train_data) saver = self.get_saver() add_model_op = self.add_model() logits = self.logits_op() loss = self.full_loss_op(logits=logits, labels=self.labelholder) train_op = self.training(loss) if new_model: init = tf.global_variables_initializer() self.merged_summaries = tf.summary.merge_all() sess.run(init) # else: # # # ckpt = tf.train.get_checkpoint_state('./weights') # if ckpt and ckpt.model_checkpoint_path: # saver.restore(sess, ckpt.model_checkpoint_path) # print(tf.report_uninitialized_variables(tf.global_variables())) # #sess.run(tf.variable_initializer(tf.report_uninitialized_variables(tf.all_variables()))) for step, tree in enumerate(self.train_data): feed_dict = self.build_feed_dict(tree.root, is_training=True) loss_value, _ = sess.run([loss, train_op], feed_dict=feed_dict) merged, current_step = sess.run( [self.merged_summaries, global_step], feed_dict=feed_dict) summary_writer.add_summary(merged, global_step=current_step) loss_history.append(loss_value) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step + 1, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() if not os.path.exists("./weights"): os.makedirs("./weights") # print('./weights/%s.temp'%self.config.model_name) print("\nSaving %s" % self.config.model_name) out_file = saver.save(sess, './weights/%s.cpkt' % self.config.model_name, global_step=global_step) print("File out: ", out_file) # print(saver.last_checkpoints) train_preds, _ = self.predict(self.train_data, sess) val_preds, val_losses = self.predict(self.dev_data, sess, get_loss=True, is_training=False) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print() print('Training acc (only root node): {}'.format(train_acc)) print('Validation acc (only root node): {}'.format(val_acc)) print(self.make_conf(train_labels, train_preds)) print(self.make_conf(val_labels, val_preds)) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, sess, verbose=True): best_saver = self.get_best_saver() complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = self.config.max_epochs # default stop location # probably can remove initialization to here summary_writer = tf.summary.FileWriter('rnn_logs/test_log/', sess.graph) for epoch in range(self.config.max_epochs): print('epoch %d' % epoch) if epoch == 0: train_acc, val_acc, loss_history, val_loss = self.run_epoch( new_model=True, epoch=epoch, sess=sess, verbose=verbose, summary_writer=summary_writer) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch( epoch=epoch, sess=sess, verbose=verbose, summary_writer=summary_writer) complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) # lr annealing epoch_loss = np.mean(loss_history) if epoch_loss > prev_epoch_loss * self.config.anneal_threshold: self.config.lr /= self.config.anneal_by print('annealed lr to %f' % self.config.lr) prev_epoch_loss = epoch_loss # save if model has improved on val print("validation loss: %f; prior_best: %f (Epoch %d)" % (val_loss, best_val_loss, best_val_epoch)) if val_loss < best_val_loss: if not os.path.exists("./weights/best"): os.makedirs("./weights/best") best_saver.save(sess, './weights/best/%s.cpkt' % (self.config.model_name), global_step=global_step) print( "saving new (best) checkpoint; (Epoch %d) \nFile: " % epoch, best_saver.last_checkpoints[-1]) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch break if verbose: sys.stdout.write('\r') sys.stdout.flush() print('\n\nstopped at %d\n' % stopped) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l, p in zip(labels, predictions): confmat[l, p] += 1 return confmat
class LSTM_Model(LanguageModel): def load_data(self, debug=False): self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) #self.encoded_test = np.array( #[self.vocab.encode(word) for word in get_ptb_dataset('test')], #dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] #读入训练数据 self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.float32, (None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32) def add_embedding(self): #将one-hot转化为词向量 inputs = [] with tf.device('/cpu:0'): L = tf.get_variable("Embedding", (len(self.vocab), self.config.embed_size)) tensors = tf.nn.embedding_lookup(L, self.input_placeholder) split_tensors = tf.split(1, self.config.num_steps, tensors) for tensor in split_tensors: inputs.append(tf.squeeze(tensor, [1])) return inputs #返回的是一个list def add_projection(self, rnn_outputs): #把隐藏层转化为词语 with tf.variable_scope("projection"): U = tf.get_variable("U", shape=(self.config.hidden_size, len(self.vocab))) b = tf.get_variable("b", shape=(len(self.vocab), )) outputs = [tf.matmul(x, U) + b for x in rnn_outputs] ###softmax? return outputs def add_loss_op(self, output): #计算损失函数 loss = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])]) return loss def add_training_op(self, loss): #对损失函数进行优化 optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.lstm_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.lstm_outputs) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): hidden_size = self.config.hidden_size embed_size = self.config.embed_size batch_size = self.config.batch_size with tf.variable_scope("LSTM"): U_i = tf.get_variable("U_i", shape=(hidden_size, hidden_size)) U_f = tf.get_variable("U_f", shape=(hidden_size, hidden_size)) U_o = tf.get_variable("U_o", shape=(hidden_size, hidden_size)) U_c = tf.get_variable("U_c", shape=(hidden_size, hidden_size)) W_i = tf.get_variable("W_i", shape=(embed_size, hidden_size)) W_f = tf.get_variable("W_f", shape=(embed_size, hidden_size)) W_o = tf.get_variable("W_o", shape=(embed_size, hidden_size)) W_c = tf.get_variable("W_c", shape=(embed_size, hidden_size)) self.initial_state = tf.zeros([batch_size, hidden_size * 2]) pre_h, pre_c = tf.split(1, 2, self.initial_state) lstm_outputs = [] for step in inputs: step = tf.nn.dropout(step, self.dropout_placeholder) i_t = tf.sigmoid(tf.matmul(step, W_i) + tf.matmul(pre_h, U_i)) f_t = tf.sigmoid(tf.matmul(step, W_f) + tf.matmul(pre_h, U_f)) o_t = tf.sigmoid(tf.matmul(step, W_o) + tf.matmul(pre_h, U_o)) c_t = tf.tanh(tf.matmul(step, W_c) + tf.matmul(pre_h, U_c)) pre_c = f_t * pre_c + i_t * c_t pre_h = o_t * tf.tanh(pre_c) lstm_outputs.append(tf.nn.dropout(pre_h, self.dropout_placeholder)) self.final_state = tf.concat(1, [pre_h, pre_c]) return lstm_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) #总的迭代次数 total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class Model(): def __init__(self, config): self.config = config self.load_data(debug=False) self.build_model() def load_vocab(self,debug): self.vocab = Vocab() if debug: self.vocab.construct(get_words_dataset('dev')) else: self.vocab.construct(get_words_dataset('train')) self.vocab.build_embedding_matrix(self.config.word_embed_size) self.embedding_matrix = self.vocab.embedding_matrix def load_data(self, debug=False): """ Loads starter word-vectors and train/dev/test data. """ self.load_vocab(debug) config = self.config if debug: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) else: # Load the training set train_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'train', 'post')) ( self.sent1_train, self.sent2_train, self.len1_train, self.len2_train, self.y_train ) = zip(*train_data) self.sent1_train, self.sent2_train = np.vstack(self.sent1_train), np.vstack(self.sent2_train) self.len1_train, self.len2_train = ( np.array(self.len1_train), np.array(self.len2_train) ) self.y_train = np.array(self.y_train) print('# training examples: %d' %len(self.y_train)) # Load the validation set dev_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'dev', 'post')) ( self.sent1_dev, self.sent2_dev, self.len1_dev, self.len2_dev, self.y_dev ) = zip(*dev_data) self.sent1_dev, self.sent2_dev = np.vstack(self.sent1_dev), np.vstack(self.sent2_dev) self.len1_dev, self.len2_dev = ( np.array(self.len1_dev), np.array(self.len2_dev) ) self.y_dev = np.array(self.y_dev) print('# dev examples: %d' %len(self.y_dev)) # Load the test set test_data = list(get_sentences_dataset(self.vocab, config.sent_len, 'test', 'post')) ( self.sent1_test, self.sent2_test, self.len1_test, self.len2_test, self.y_test ) = zip(*test_data) self.sent1_test, self.sent2_test = np.vstack(self.sent1_test), np.vstack(self.sent2_test) self.len1_test, self.len2_test = ( np.array(self.len1_test), np.array(self.len2_test) ) self.y_test = np.array(self.y_test) print('# test examples: %d' %len(self.y_test)) print('min len: ', np.min(self.len2_train)) def build_model(self): config = self.config k = config.sentence_embed_size L = config.sent_len # input tensors self.sent1_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent1') self.sent2_ph = tf.placeholder(tf.int32, shape=[None, L], name='sent2') self.len1_ph = tf.placeholder(tf.int32, shape=[None], name='len1') self.len2_ph = tf.placeholder(tf.int32, shape=[None], name='len2') self.labels_ph = tf.placeholder(tf.float32, shape=[None, config.label_size], name='label') self.kp_ph = tf.placeholder(tf.float32, name='kp') kp = self.kp_ph # set embedding matrix to pretrained embedding init_embeds = tf.constant(self.embedding_matrix, dtype='float32') word_embeddings = tf.get_variable( dtype='float32', name='word_embeddings', initializer=init_embeds, trainable=False) # no fine-tuning of word embeddings x1 = tf.nn.embedding_lookup(word_embeddings, self.sent1_ph) x2 = tf.nn.embedding_lookup(word_embeddings, self.sent2_ph) x1, x2 = tf.nn.dropout(x1, kp), tf.nn.dropout(x2, kp) def lstmn(x, length, scope): with tf.variable_scope(scope): W_h = tf.get_variable(name='W_h', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_hs = tf.get_variable(name='W_hs', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_x = tf.get_variable(name='W_x', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_M = tf.get_variable(name='b_M', initializer=tf.zeros([L, k])) w = tf.get_variable(name='w', shape=[k, 1], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_a = tf.get_variable(name='b_a', initializer=tf.zeros([L])) W_rnn_h_i = tf.get_variable(name='W_rnn_h_i', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_i = tf.get_variable(name='W_rnn_x_i', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_i = tf.get_variable(name='b_rnn_i', initializer=tf.zeros([k])) W_rnn_h_f = tf.get_variable(name='W_rnn_h_f', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_f = tf.get_variable(name='W_rnn_x_f', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_f = tf.get_variable(name='b_rnn_f', initializer=tf.zeros([k])) W_rnn_h_o = tf.get_variable(name='W_rnn_h_o', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_o = tf.get_variable(name='W_rnn_x_o', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_o = tf.get_variable(name='b_rnn_o', initializer=tf.zeros([k])) W_rnn_h_c = tf.get_variable(name='W_rnn_h_c', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) W_rnn_x_c = tf.get_variable(name='W_rnn_x_c', shape=[k, k], regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_rnn_c = tf.get_variable(name='b_rnn_c', initializer=tf.zeros([k])) c0 = tf.zeros([tf.shape(length)[0], k]) h0 = tf.zeros([tf.shape(length)[0], k]) hst_1 = tf.zeros([tf.shape(length)[0], k]) Cl, Hl = [c0], [h0] for t in range(L): Ct_1 = tf.stack(Cl, axis=1) Ht_1 = tf.stack(Hl, axis=1) H_mod = tf.reshape(Ht_1, [-1, k]) xt = x[:,t,:] Xt = tf.reshape(tf.tile(xt, [1, t+1]), [-1, t+1, k]) Xt_mod = tf.reshape(Xt, [-1, k]) Hst_1 = tf.reshape(tf.tile(hst_1, [1, t+1]), [-1, t+1, k]) Hst_1_mod = tf.reshape(Hst_1, [-1, k]) Mt = tf.nn.tanh( tf.reshape(tf.matmul(H_mod, W_h), [-1, t+1, k]) + tf.reshape(tf.matmul(Xt_mod, W_x), [-1, t+1, k]) + tf.reshape(tf.matmul(Hst_1_mod, W_hs), [-1, t+1, k]) + b_M[:t+1]) Mt_w = tf.matmul(tf.reshape(Mt, [-1, k]), w) alphat = tf.nn.softmax(tf.reshape(Mt_w, [-1, 1, t+1]) + b_a[:t+1]) cst = tf.reshape(tf.matmul(alphat, Ct_1), [-1, k]) hst = tf.reshape(tf.matmul(alphat, Ht_1), [-1, k]) hst_1 = hst it = tf.sigmoid(tf.matmul(hst, W_rnn_h_i) + tf.matmul(xt, W_rnn_x_i) + b_rnn_i) ft = tf.sigmoid(tf.matmul(hst, W_rnn_h_f) + tf.matmul(xt, W_rnn_x_f) + b_rnn_f) ot = tf.sigmoid(tf.matmul(hst, W_rnn_h_o) + tf.matmul(xt, W_rnn_x_o) + b_rnn_o) cht = tf.nn.tanh(tf.matmul(hst, W_rnn_h_c) + tf.matmul(xt, W_rnn_x_c) + b_rnn_c) ct = ft*cst + it*cht ht = ot*tf.nn.tanh(ct) Cl.append(ct) Hl.append(ht) return ( tf.transpose(tf.stack(Hl), [1, 0, 2]), tf.transpose(tf.stack(Cl), [1, 0, 2]) ) H1, _ = lstmn(x1, self.len1_ph, 'lstmn1') H2, _ = lstmn(x2, self.len2_ph, 'lstmn2') def get_last_relevant_output(out, seq_len): rng = tf.range(0, tf.shape(seq_len)[0]) indx = tf.stack([rng, seq_len - 1], 1) last = tf.gather_nd(out, indx) return last h1 = get_last_relevant_output(H1, self.len1_ph) h2 = get_last_relevant_output(H2, self.len2_ph) h_s = tf.concat([h1, h2], 1) y = h_s # MLP classifier on top hidden_sizes = config.hidden_sizes for layer, size in enumerate(hidden_sizes): if layer > 0: previous_size = hidden_sizes[layer-1] else: previous_size = 2*k W = tf.get_variable(name='W{}'.format(layer), shape=[previous_size, size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b = tf.get_variable(name='b{}'.format(layer), initializer=tf.zeros([size])) y = tf.nn.relu(tf.matmul(y, W) + b) W_softmax = tf.get_variable(name='W_softmax', shape=[hidden_sizes[-1], config.label_size], initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(config.l2)) b_softmax = tf.get_variable(name='b_softmax', initializer=tf.zeros([config.label_size])) logits = tf.matmul(y, W_softmax) + b_softmax cross_entropy_loss = tf.reduce_mean( tf.losses.softmax_cross_entropy(self.labels_ph, logits) ) reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) self.loss = cross_entropy_loss #+ tf.add_n(reg_losses) self.train_op = ( tf.train.AdamOptimizer(learning_rate=config.lr) .minimize(self.loss) ) self.probs = tf.nn.softmax(logits) self.predictions = tf.argmax(self.probs, 1) correct_prediction = tf.equal( tf.argmax(self.labels_ph, 1), self.predictions) self.correct_predictions = tf.reduce_sum(tf.cast(correct_prediction, 'int32')) def create_feed_dict(self, sent1_batch, sent2_batch, len1_batch, len2_batch, label_batch, keep_prob): feed_dict = { self.sent1_ph: sent1_batch, self.sent2_ph: sent2_batch, self.len1_ph: len1_batch, self.len2_ph: len2_batch, self.labels_ph: label_batch, self.kp_ph: keep_prob } return feed_dict def run_epoch(self, session, sent1_data, sent2_data, len1_data, len2_data, input_labels, verbose=100): orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y = ( sent1_data, sent2_data, len1_data, len2_data, input_labels ) kp = self.config.kp total_loss = [] total_correct_examples = 0 total_processed_examples = 0 total_steps = int( orig_sent1.shape[0] / self.config.batch_size) for step, (sent1, sent2, len1, len2, y) in enumerate( data_iterator(orig_sent1, orig_sent2, orig_len1, orig_len2, orig_y, batch_size=self.config.batch_size, label_size=self.config.label_size)): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) loss, total_correct, _ = session.run( [self.loss, self.correct_predictions, self.train_op], feed_dict=feed) total_processed_examples += len(y) total_correct_examples += total_correct total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') sys.stdout.flush() return np.mean(total_loss), total_correct_examples / float(total_processed_examples), total_loss def predict(self, session, sent1_data, sent2_data, len1_data, len2_data, y=None): """Make predictions from the provided model.""" # If y is given, the loss is also calculated # We deactivate dropout by setting it to 1 kp = 1.0 losses = [] results = [] if np.any(y): data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, y, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) else: data = data_iterator(sent1_data, sent2_data, len1_data, len2_data, batch_size=self.config.batch_size, label_size=self.config.label_size, shuffle=False) for step, (sent1, sent2, len1, len2, y) in enumerate(data): feed = self.create_feed_dict(sent1, sent2, len1, len2, y, kp) if np.any(y): loss, preds = session.run( [self.loss, self.predictions], feed_dict=feed) losses.append(loss) else: preds = session.run(self.predictions, feed_dict=feed) results.extend(preds) return np.mean(losses), np.array(results)
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) #self.encoded_test = np.array( #[self.vocab.encode(word) for word in get_ptb_dataset('test')], #dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug]#读入训练数据 self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, (None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.float32, (None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32) def add_embedding(self):#将one-hot转化为词向量 inputs = [] with tf.device('/cpu:0'): L = tf.get_variable("Embedding", (len(self.vocab), self.config.embed_size)) tensors = tf.nn.embedding_lookup(L, self.input_placeholder) split_tensors = tf.split(1, self.config.num_steps, tensors) for tensor in split_tensors: inputs.append(tf.squeeze(tensor, [1])) return inputs#返回的是一个list def add_projection(self, rnn_outputs):#把隐藏层转化为词语 with tf.variable_scope("projection"): U=tf.get_variable("U",shape=(self.config.hidden_size,len(self.vocab))) b_2=tf.get_variable("b_2",shape=(len(self.vocab),)) outputs=[tf.matmul(x,U)+b_2 for x in rnn_outputs]###softmax? return outputs def add_loss_op(self, output):#计算损失函数 loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])]) return loss def add_training_op(self, loss):#对损失函数进行优化 optimizer=tf.train.AdamOptimizer(self.config.lr) train_op=optimizer.minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): hidden_size=self.config.hidden_size embed_size=self.config.embed_size batch_size=self.config.batch_size with tf.variable_scope("RNN"): H=tf.get_variable("H",shape=(hidden_size,hidden_size)) I=tf.get_variable("I",shape=(embed_size,hidden_size)) b_1=tf.get_variable("b_1",shape=(hidden_size,)) self.initial_state=tf.zeros([batch_size,hidden_size]) pre_h=self.initial_state rnn_outputs=[] for step in inputs: step=tf.nn.dropout(step,self.dropout_placeholder) pre_h=tf.sigmoid(tf.matmul(pre_h,H)+tf.matmul(step,I)+b_1) rnn_outputs.append(tf.nn.dropout(pre_h,self.dropout_placeholder)) self.final_state=pre_h return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps))#总的迭代次数 total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class Model_RNN(LanguageModel): def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../lyrics/data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps X_train, l_train, self.word_counts, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold=100, n_class=self.config.n_class) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]] def add_placeholders(self): self.X1 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X1') self.X2 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X2') self.labels = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels') #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state') self.seq_len1 = tf.placeholder(tf.int32, shape=(None), name='seq_len1') # for variable length sequences self.seq_len2 = tf.placeholder(tf.int32, shape=(None), name='seq_len2') # for variable length sequences def add_embedding(self): L = tf.get_variable('L', shape=(len(self.word_counts.keys()), self.config.embed_size), dtype=tf.float32) inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs1 = [tf.squeeze(x) for x in inputs1] inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs2 = [tf.squeeze(x) for x in inputs2] print 'onh' print inputs1[0].get_shape return inputs1, inputs2 def add_model(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) print 'adsf add_model' self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) rnn_outputs = [] rnn_outputs1 = [] rnn_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state print 'nthgnghn' with tf.variable_scope('rnn'): Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32) Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size), dtype=tf.float32) b1 = tf.get_variable('bhx', shape=(self.config.hidden_size,), dtype=tf.float32) print Wxh.get_shape print inputs1[0].get_shape print inputs2[0].get_shape for i in range(self.config.max_steps): h_curr2 = tf.matmul(h_curr2,Whh) h_curr2 += tf.matmul(inputs2[i],Wxh) h_curr2 += b1 h_curr2 = tf.sigmoid(h_curr2) h_curr1 = tf.sigmoid(tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1) rnn_outputs1.append(h_curr1) rnn_outputs2.append(h_curr2) rnn_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)] return rnn_states def add_projection(self, rnn_states): # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think. Whc = tf.get_variable('Whc', shape=(2*self.config.hidden_size,self.config.n_class)) bhc = tf.get_variable('bhc', shape=(self.config.n_class,)) projections = tf.matmul(rnn_states[-1],Whc) + bhc # in case we stop short sequences, the rnn_state in further time_steps should be unch return projections def add_loss_op(self, y): loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels) loss = tf.reduce_sum(loss) return loss def add_training_op(self, loss): #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss) train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data() self.add_placeholders() print 'adsf __init__' print self.X1.get_shape self.inputs1, self.inputs2 = self.add_embedding() self.rnn_states = self.add_model(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) self.projections = self.add_projection(self.rnn_states) self.loss = self.add_loss_op(self.projections) self.train_step = self.add_training_op(self.loss) self.predictions = tf.argmax(tf.nn.softmax(self.projections),1) self.correct_predictions = tf.equal(self.predictions,tf.argmax(self.labels,1)) self.correct_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32')) def run_epoch(self, session, X1, X2, labels, train_op, verbose=10): # X and y are 2D np arrays print 'adsf run_epoch' config = self.config #state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state.eval() data_len = np.shape(X1)[0] index = np.arange(data_len) np.random.shuffle(index) n_batches = data_len // self.config.batch_size loss = 0.0 for batch_num in range(n_batches): print 'sadf batch_num', str(batch_num) x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] seq_len_batch1 = [1 for i in range(X1.shape[0])] seq_len_batch2 = [1 for i in range(X1.shape[0])] labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] print 'qwer', x1_batch.shape print 'qwer', x2_batch.shape feed_dict = {self.X1: x1_batch, self.X2: x2_batch, self.labels: labels_batch, self.seq_len1: seq_len_batch1, self.seq_len2: seq_len_batch2} #self.initial_state: state} loss, total_correct, _ = session.run([self.loss, self.correct_predictions, train_op], feed_dict=feed_dict) total_loss.append(loss) if verbose and (batch_num+1)%verbose==0: sys.stdout.write('\r{} / {} : pp = {}'.format(batch_num+1, n_batches, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data( 300, 70, 100) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) self.w2v_vocab, w2v_embd, embedding_dict = self.load_w2v() self.embedding_dim = len(w2v_embd[0]) self.w2v_vocab_size = len(self.w2v_vocab) self.vocab_size = len(self.vocab) embeddings_tmp = [] for i in range(self.vocab_size): item = self.vocab.decode(i) if item in self.w2v_vocab: embeddings_tmp.append(embedding_dict[item]) # print("Found word {}".format(item)) else: # print("Couldn't find {}.".format(item)) rand_num = np.random.uniform(low=-0.2, high=0.2, size=self.embedding_dim) embeddings_tmp.append(rand_num) self.embed = np.asarray(embeddings_tmp) def inference(self, tree, predict_only_root=True): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [ tensor for node, tensor in node_tensors.items() if node.label != 2 ] node_tensors = tf.concat(node_tensors, 0) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. Hint: Look up tf.get_variable ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE # embedding = tf.get_variable( # "embedding", (len(self.vocab), self.config.embed_size)) embedding = tf.get_variable( "embedding", shape=[self.vocab_size, self.embedding_dim], initializer=tf.constant_initializer(self.embed), trainable=False) # embedding = tf.Variable( # tf.constant(0.0, shape=[self.vocab_size, self.embedding_dim]), # trainable=False, # name="embedding") # self.embedding_placeholder = tf.placeholder( # tf.float32, [self.vocab_size, self.embedding_dim]) # self.embedding_init = embedding.assign(self.embedding_placeholder) # embedding = tf.get_variable("embedding", shape=[self.w2v_vocab_size, self.config.embed_size], # initializaer=tf.constant_initializer(self.embed), trainable=False) W1 = tf.get_variable("W1", (self.embedding_dim, self.embedding_dim)) b1 = tf.get_variable("b1", (1, self.embedding_dim)) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", (self.embedding_dim, self.config.label_size)) bs = tf.get_variable("bs", (1, self.config.label_size)) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by its parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") # the variables are already stored in self? ## END YOUR CODE node_tensors = dict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE # word_id = self.vocab.encode(node.word) # embedded_chars = tf.nn.embedding_lookup(embedding, word_id) # curr_node_tensor = tf.unstack(embedded_chars, 1, 1) word_id = self.vocab.encode(node.word) curr_node_tensor = tf.expand_dims(tf.gather(embedding, word_id), 0) ### END YOUR CODE else: node_input = tf.zeros((1, self.embedding_dim)) for child in node.children: node_tensors.update(self.add_model(child)) node_input = tf.add(node_input, node_tensors[child]) ### YOUR CODE HERE curr_node_tensor = tf.nn.relu(tf.matmul(node_input, W1) + b1) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits)) + self.config.l2 * tf.nn.l2_loss( W1) + self.config.l2 * tf.nn.l2_loss(U) # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE train_op = tf.train.GradientDescentOptimizer( self.config.lr).minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE predictions = tf.argmax(y, axis=1) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss=False): """Make predictions from the provided model.""" results = [] losses = [] for i in range(int(math.ceil(len(trees) / float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() # sess.run( # self.embedding_init, # feed_dict={self.embedding_placeholder: self.embed}) saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i * RESET_AFTER:(i + 1) * RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if root_prediction == 1: root_prediction = 4 if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model=False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() # sess.run( # self.embedding_init, # feed_dict={self.embedding_placeholder: self.embed}) if new_model: init = tf.global_variables_initializer() sess.run(init) else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp' % self.config.model_name) for _ in range(RESET_AFTER): if step >= len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) # print(sess.run(logits)) labels = [l for l in tree.labels if l != 2] if labels[0] == 4: labels = [1] # print(labels) loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step += 1 saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") saver.save(sess, './weights/%s.temp' % self.config.model_name) train_preds, _ = self.predict( self.train_data, './weights/%s.temp' % self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp1' % self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print() print('Training acc (only root node): {}'.format(train_acc)) print('Validation acc (only root node): {}'.format(val_acc)) print('Confusion matrix:') print(self.make_conf(train_labels, train_preds)) print(self.make_conf(val_labels, val_preds)) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in range(self.config.max_epochs): print('epoch %d' % epoch) if epoch == 0: train_acc, val_acc, loss_history, val_loss = self.run_epoch( new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss > prev_epoch_loss * self.config.anneal_threshold: self.config.lr /= self.config.anneal_by print('annealed lr to %f' % self.config.lr) prev_epoch_loss = epoch_loss # save if model has improved on val if val_loss < best_val_loss: best_val_loss = val_loss best_val_epoch = epoch # if model has not improved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch # break if verbose: sys.stdout.write('\r') sys.stdout.flush() print('\n\nstopped at %d\n' % stopped) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) labels = [l if l != 4 else 1 for l in labels] predictions = [p if p != 4 else 1 for p in predictions] for l, p in zip(labels, predictions): confmat[l, p] += 1 return confmat def load_w2v(self): vocab = [] embd = [] e_dict = {} # change 100d to 50d for smaller-dimension GloVe embedding file = open("./glove.6B.100d.txt", 'r', encoding='UTF-8') for line in file.readlines(): row = line.strip().split(' ') vocab.append(row[0]) embd.append(row[1:]) e_dict[row[0]] = [float(i) for i in row[1:]] print("Loaded word2vec!") file.close() return vocab, embd, e_dict
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" #todo =========> CREATE LOAD DATA <+============ self.train_data, self.dev_data, self.test_data = tr.simplified_data( 700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root][0] else: node_tensors = [ tensor[0] for node, tensor in node_tensors.iteritems() if node.label != 2 ] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self, keep_prob): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Embeddings'): tf.get_variable('embeddings', [len(self.vocab), self.config.embed_size]) with tf.variable_scope('Composition'): # tf.get_variable('W1', # [2 * self.config.embed_size, self.config.embed_size]) # tf.get_variable('b1', [1, self.config.embed_size]) lstm_cell_forward = tf.nn.rnn_cell.LSTMCell(self.config.embed_size) lstm_cell_backward = tf.nn.rnn_cell.LSTMCell( self.config.embed_size) lstm_cell_forward = tf.nn.rnn_cell.DropoutWrapper( lstm_cell_forward, output_keep_prob=keep_prob) lstm_cell_backward = tf.nn.rnn_cell.DropoutWrapper( lstm_cell_backward, output_keep_prob=keep_prob) self.cell_forward = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell_forward] * self.config.num_layers) self.cell_backward = tf.nn.rnn_cell.MultiRNNCell( [lstm_cell_backward] * self.config.num_layers) with tf.variable_scope('Projection'): tf.get_variable( 'U', [2 * self.config.embed_size, self.config.label_size]) tf.get_variable('bs', [1, self.config.label_size]) def embed_word(self, word): with tf.variable_scope('Embeddings', reuse=True): embeddings = tf.get_variable('embeddings') with tf.device('/cpu:0'): return tf.expand_dims( tf.nn.embedding_lookup(embeddings, self.vocab.encode(word)), 0) def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ # Dictionary of Node->(ouput, state) == osTuple node_tensors = OrderedDict() with tf.variable_scope('Composition', reuse=True): fw_cell = self.cell_forward bw_cell = self.cell_forward curr_node_osTuple = None if node.isLeaf: # curr_node_tensor = self.embed_word(node.word) out = self.embed_word(node.word) # curr_node_dep_emb = self.embed_word(node.dep) else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) left_state = tf.zeros_like(node_tensors[node.left][1]) right_state = tf.zeros_like(node_tensors[node.right][1]) if not node.isLeaf: left_state = node_tensors[node.left][1] right_state = node_tensors[node.right][1] # inital_state = tf.concat(-1, [left_state, right_state]) # out_depfw, state_depfw = fw_cell(curr_node_dep_emb, left_state) out_wordfw, state_wordfw = fw_cell(right_state, left_state) # out_wordbw, state_wordbw = bw_cell(left_state, right_state) # out_depbw, state_depbw = bw_cell(curr_node_dep_emb, state_wordbw) # out = tf.concat(1, [out_wordfw, out_depbw]) # state = tf.concat(1, [state_wordfw, state_depbw]) out = tf.concat(1, [out_wordfw, out_wordbw]) state = tf.concat(1, [state_wordfw, state_wordbw]) osTuple = (out, state) # curr_node_tensor = prelu(tf.matmul(node_input, W1) + b1) node_tensors[node] = osTuple return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ with tf.variable_scope('Projection', reuse=True): U = tf.get_variable('U') bs = tf.get_variable('bs') logits = tf.matmul(node_tensors, U) + bs return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ softmax_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits, tf.constant(labels))) # with tf.variable_scope('Composition', reuse=True): # W1 = tf.get_variable('W1') with tf.variable_scope('Projection', reuse=True): U = tf.get_variable('U') return softmax_loss + self.config.l2 * tf.nn.l2_loss(U) def training(self, loss_tensor): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ return tf.train.GradientDescentOptimizer( self.config.lr).minimize(loss_tensor) def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?) """ return tf.argmax(y, 1) def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss=False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees) / float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars(1.) saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i * RESET_AFTER:(i + 1) * RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model=False, verbose=True): step = 0 loss_history = [] random.shuffle(self.train_data) while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars(0.5) if new_model: init = tf.initialize_all_variables() sess.run(init) new_model = False else: saver = tf.train.Saver() saver.restore( sess, SAVE_DIR + '%s.temp' % self.config.model_name) for _ in xrange(RESET_AFTER): if step >= len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l != 2] loss_tensor = self.loss(logits, labels) train_op = self.training(loss_tensor) loss_value, _ = sess.run([loss_tensor, train_op]) loss_history.append(loss_value) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step += 1 saver = tf.train.Saver() if not os.path.exists(SAVE_DIR): os.makedirs(SAVE_DIR) saver.save(sess, SAVE_DIR + '%s.temp' % self.config.model_name) train_preds, _ = self.predict( self.train_data, SAVE_DIR + '%s.temp' % self.config.model_name) val_preds, val_losses = self.predict( self.dev_data, SAVE_DIR + '%s.temp' % self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d' % epoch if epoch == 0: train_acc, val_acc, loss_history, val_loss = self.run_epoch( new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss > prev_epoch_loss * self.config.anneal_threshold: self.config.lr /= self.config.anneal_by print 'annealed lr to %f' % self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile(SAVE_DIR + '%s.temp' % self.config.model_name, SAVE_DIR + '%s' % self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n' % stopped return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l, p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class Model_RNN(LanguageModel): def load_data(self): pair_fname = '../lastfm_train_mappings.txt' lyrics_path = '../data/lyrics/train/' # X_train is a list of all examples. each examples is a 2-len list. each element is a list of words in lyrics. # word_counts is a dictionary that maps if self.config.debug: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, '../glove.6B.50d.txt', threshold_down=0, threshold_up=float('inf'), npos=100, nneg=100) else: X_train, l_train, self.word_counts, seq_len1, seq_len2, self.config.max_steps = get_data(pair_fname, lyrics_path, threshold_down=100, threshold_up=4000, npos=10000, nneg=10000) self.labels_train = np.zeros((len(X_train),self.config.n_class)) self.labels_train[range(len(X_train)),l_train] = 1 x = collections.Counter(l_train) for k in x.keys(): print 'class:', k, x[k] print '' self.vocab = Vocab() self.vocab.construct(self.word_counts.keys()) self.wv = self.vocab.get_wv('../glove.6B.50d.txt') with open('word_hist.csv', 'w') as f: for w in self.word_counts.keys(): f.write(w+','+str(self.word_counts[w])+'\n') self.encoded_train_1 = np.zeros((len(X_train), self.config.max_steps)) # need to handle this better. self.encoded_train_2 = np.zeros((len(X_train), self.config.max_steps)) for i in range(len(X_train)): self.encoded_train_1[i,:len(X_train[i][0])] = [self.vocab.encode(word) for word in X_train[i][0]] self.encoded_train_2[i,:len(X_train[i][1])] = [self.vocab.encode(word) for word in X_train[i][1]] self.sequence_len1 = np.array(seq_len1) self.sequence_len2 = np.array(seq_len2) def add_placeholders(self): self.X1 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X1') self.X2 = tf.placeholder(tf.int32, shape=(None, self.config.max_steps), name='X2') self.labels = tf.placeholder(tf.float32, shape=(None, self.config.n_class), name='labels') #self.initial_state = tf.placeholder(tf.float32, shape=(None, self.config.hidden_size), name='initial_state') self.seq_len1 = tf.placeholder(tf.int32, shape=(None), name='seq_len1') # for variable length sequences self.seq_len2 = tf.placeholder(tf.int32, shape=(None), name='seq_len2') # for variable length sequences def add_embedding(self): #L = tf.get_variable('L', shape=(len(self.vocab), self.config.embed_size), dtype=tf.float32) L = tf.Variable(tf.convert_to_tensor(self.wv, dtype=tf.float32), name='L') #L = tf.constant(tf.convert_to_tensor(self.wvi), dtype=tf.float32, name='L') inputs1 = tf.nn.embedding_lookup(L, self.X1) # self.X1 is batch_size x self.config.max_steps inputs2 = tf.nn.embedding_lookup(L, self.X2) # input2 is batch_size x self.config.max_steps x self.config.embed_size inputs1 = tf.split(1, self.config.max_steps, inputs1) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs1 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs1] inputs2 = tf.split(1, self.config.max_steps, inputs2) # list of len self.config.max_steps where each element is batch_size x self.config.embed_size inputs2 = [tf.squeeze(x, squeeze_dims=[1]) for x in inputs2] return inputs1, inputs2 def add_model_rnn(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) rnn_outputs = [] rnn_outputs1 = [] rnn_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state with tf.variable_scope('rnn'): Whh = tf.get_variable('Whh', shape=(self.config.hidden_size,self.config.hidden_size), dtype=tf.float32) Wxh = tf.get_variable('Wxh', shape=(self.config.embed_size,self.config.hidden_size), dtype=tf.float32) b1 = tf.get_variable('bhx', shape=(4*self.config.hidden_size,), dtype=tf.float32) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len1[0]: breaka tmp = tf.matmul(h_curr1,Whh) + tf.matmul(inputs1[i],Wxh) + b1 rnn_outputs1.append(h_curr1) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len2[0]: breaka h_curr2 = tf.sigmoid(tf.matmul(h_curr2,Whh) + tf.matmul(inputs2[i],Wxh) + b1) rnn_outputs2.append(h_curr2) #lstm_states = [tf.concat(1, [rnn_outputs1[i], rnn_outputs2[i]]) for i in range(self.config.max_steps)] rnn_final_states = tf.concat(1, [rnn_outputs1[-1], rnn_outputs2[-1]]) return rnn_final_states def add_model_lstm(self, inputs1, inputs2, seq_len1, seq_len2): #self.initial_state = tf.constant(np.zeros(()), dtype=tf.float32) self.initial_state = tf.constant(np.zeros((self.config.batch_size,self.config.hidden_size)), dtype=tf.float32) lstm_outputs1 = [] lstm_outputs2 = [] h_curr1 = self.initial_state h_curr2 = self.initial_state cell1 = self.initial_state cell2 = self.initial_state with tf.variable_scope('lstm'): Whc = tf.get_variable('Whh', shape=(self.config.hidden_size,4*self.config.hidden_size), dtype=tf.float32, initializer=tf.random_normal_initializer()) Wxc = tf.get_variable('Wxh', shape=(self.config.embed_size,4*self.config.hidden_size), dtype=tf.float32, initializer=tf.random_normal_initializer()) b1 = tf.get_variable('bhx', shape=(self.config.hidden_size,), dtype=tf.float32, initializer=tf.random_normal_initializer()) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len1[0]: break ifog1 = tf.matmul(h_curr1,Whc) + tf.matmul(inputs1[i],Wxc) i1, f1, o1, g1 = tf.split(1, 4, ifog1) i1 = tf.sigmoid(i1) f1 = tf.sigmoid(f1) o1 = tf.sigmoid(o1) g1 = tf.tanh(g1) cell1 = f1*cell1 + i1*g1 h_curr1 = o1*tf.tanh(cell1) lstm_outputs1.append(h_curr1) for i in range(self.config.max_steps): if self.config.batch_size==1: if i==seq_len2[0]: break ifog2 = tf.matmul(h_curr2,Whc) + tf.matmul(inputs2[i],Wxc) i2, f2, o2, g2 = tf.split(1, 4, ifog2) i2 = tf.sigmoid(i2) f2 = tf.sigmoid(f2) o2 = tf.sigmoid(o2) g2 = tf.tanh(g2) cell2 = f2*cell2 + i2*g2 h_curr2 = o2*tf.tanh(cell2) lstm_outputs2.append(h_curr2) lstm_final_states = tf.concat(1, [lstm_outputs1[-1], lstm_outputs2[-1]]) return lstm_final_states def add_final_projections(self, rnn_final_states): # rnn_outputs is a list of length batch_size of lengths = seq_len. Where each list element is ??. I think. Whu = tf.get_variable('Whu', shape=(2*self.config.hidden_size,self.config.n_class), initializer=tf.random_normal_initializer()) bhu = tf.get_variable('bhu', shape=(self.config.n_class,), initializer=tf.random_normal_initializer()) final_projections = tf.matmul(rnn_final_states,Whu) + bhu # in case we stop short sequences, the rnn_state in further time_steps should be unch return final_projections def add_loss_op(self, y): loss = tf.nn.softmax_cross_entropy_with_logits(y, self.labels) loss = tf.reduce_mean(loss) return loss def add_training_op(self, loss): #train_op = tf.train.AdamOptimizer(learning_rate=self.config.lr).minimize(loss) train_op = tf.train.GradientDescentOptimizer(learning_rate=self.config.lr).minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data() self.add_placeholders() self.inputs1, self.inputs2 = self.add_embedding() if self.config.model=='rnn': self.final_hidden_states = self.add_model_rnn(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) elif self.config.model=='lstm': self.final_hidden_states = self.add_model_lstm(self.inputs1, self.inputs2, self.seq_len1, self.seq_len2) self.final_projections = self.add_final_projections(self.final_hidden_states) self.loss = self.add_loss_op(self.final_projections) self.train_step = self.add_training_op(self.loss) self.predictions = tf.argmax(tf.nn.softmax(self.final_projections),1) self.correct_predictions = tf.equal(self.predictions,tf.argmax(self.labels,1)) self.correct_predictions = tf.reduce_sum(tf.cast(self.correct_predictions, 'int32')) def run_epoch(self, session, X1, X2, labels, sequence_len1, sequence_len2, train_op, verbose=10): # X and y are 2D np arrays config = self.config #state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state.eval() data_len = np.shape(X1)[0] index = np.arange(data_len) np.random.shuffle(index) n_batches = data_len // self.config.batch_size loss = 0.0 total_loss = [] total_correct = 0 all_preds = -np.ones((data_len,)) for batch_num in range(n_batches): x1_batch = X1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] x2_batch = X2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] labels_batch = labels[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size], :] seq_len_batch1 = sequence_len1[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] seq_len_batch2 = sequence_len2[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] feed_dict = {self.X1: x1_batch, self.X2: x2_batch, self.labels: labels_batch, self.seq_len1: seq_len_batch1, self.seq_len2: seq_len_batch2} #self.initial_state: state} loss, preds, correct, final_projections, _ = session.run([self.loss, self.predictions, self.correct_predictions, self.final_projections, train_op], feed_dict=feed_dict) #print str(batch_num)+'/'+str(n_batches)+' : '+str(final_projections[0][0])+' '+str(final_projections[0][1]) total_loss.append(loss) total_correct += correct all_preds[index[batch_num * self.config.batch_size : (batch_num+1) * self.config.batch_size]] = preds if verbose and (batch_num+1)%verbose==0: sys.stdout.write('\r{} / {} : loss = {:.4f} : train_acc = {:.2f}%'.format(batch_num+1, n_batches, np.mean(total_loss), 100.0*total_correct/((batch_num+1)*self.config.batch_size))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.mean(total_loss), all_preds
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) # matrix_length, 二维词表长度。 self.matrix_length = math.ceil(math.sqrt(len(self.vocab))) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible """ self.input_placeholder = tf.placeholder(dtype=tf.int32, shape=(None, self.config.num_steps), name='input') self.labels_placeholder = tf.placeholder(dtype=tf.int32, shape=(None, self.config.num_steps), name='label') self.dropout_placeholder = tf.placeholder(dtype=tf.float32, name='dropout') def add_embedding(self, name='embed'): """Add embedding layer.""" with tf.device('/gpu:0'): with tf.name_scope(name): # _______________________base_model_without_devided______________________________________# # embedding = tf.get_variable('Embedding', [len(self.vocab), # self.config.embed_size], # # trainable=True, # ) # inputs = test.embedding_lookup(embedding, self.input_placeholder) # inputs = [tf.squeeze(x, [1]) # for x in tf.split(inputs, self.config.num_steps, 1)] # return inputs #___________________________________END___________________________________________# #_______________divided row and column, combine them and integred___________________# # embedding_r = tf.get_variable('Eb_r', [self.matrix_length, # self.config.embed_size]) # embedding_c = tf.get_variable('Eb_c', [self.matrix_length, # self.config.embed_size]) # inputs_r = tf.nn.embedding_lookup(embedding_r, # self.input_placeholder // self.matrix_length, name='inputs_r') # inputs_c = tf.nn.embedding_lookup(embedding_c, # self.input_placeholder % self.matrix_length, name='inputs_c') # inputs = tf.add(inputs_r, inputs_c, name='inputs') # inputs = [tf.squeeze(x, [1]) # for x in tf.split(inputs, self.config.num_steps, 1)] # return inputs #____________________________END____________________________________# #___________divided row and column, apply the next word's row______________________________________# embedding_r = tf.get_variable( 'Eb_r', [self.matrix_length, self.config.embed_size]) embedding_c = tf.get_variable( 'Eb_c', [self.matrix_length, self.config.embed_size]) self.input_row_indice = self.input_placeholder // self.matrix_length self.input_column_indice = self.input_placeholder % self.matrix_length inputs_r = tf.nn.embedding_lookup(embedding_r, self.input_row_indice, name='inputs_r') inputs_c = tf.nn.embedding_lookup(embedding_c, self.input_column_indice, name='inputs_c') inputs_r_next = tf.nn.embedding_lookup( embedding_r, self.labels_placeholder // self.matrix_length, name='inputs_r_next') inputs = tf.concat([inputs_r, inputs_c, inputs_r_next], 2, name='combine_r_and_c') inputs = [ tf.squeeze(x, [1]) for x in tf.split(inputs, self.config.num_steps, 1) ] return inputs #____________________________END____________________________________# # inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder) # input_placeholder是2d的placeholder,[batch_size, num_steps] # inputs 是个3d的tensor, [batch_size, num_steps, embed_size] # tf.split之后依然是3d tensor, [batch_size, 1, embed_size] # tf.squeeze之后变为2d tensor, [batch_size, embed_size] # return 的是一个 list, 其中每个元素为上一行介绍的2d tensor. def add_model_RNN(self, inputs): self.initial_state = tf.zeros( [self.config.batch_size, self.config.hidden_size]) lstm_cell = rnn.BasicRNNCell(self.config.lstm_size) cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=self.config.dropout) self._initial_state = cell.zero_state( batch_size=self.config.batch_size, dtype=tf.float32) state = self._initial_state rnn_outputs = [] with tf.variable_scope('RNN') as scope: for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() output, state = cell(current_input, state) rnn_outputs.append(state) self.final_state = rnn_outputs[-1] return rnn_outputs def add_model_LSTM(self, inputs): self.initial_state = tf.zeros( [self.config.batch_size, self.config.hidden_size]) lstm_cell = rnn.BasicLSTMCell(self.config.lstm_size) cell = rnn.DropoutWrapper(lstm_cell, output_keep_prob=self.config.dropout) self._initial_state = cell.zero_state( batch_size=self.config.batch_size, dtype=tf.float32) state = self._initial_state rnn_outputs = [] with tf.variable_scope('RNN') as scope: for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() output, state = cell(current_input, state) rnn_outputs.append(state) self.final_state = rnn_outputs[-1] return rnn_outputs def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNNLM model. Note that you may NOT use built in rnn_cell functions from tensorflow. H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ #__________________________________________baseline______________________________________________________# # with tf.variable_scope('InputDropout'): # inputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in # inputs] # # with tf.variable_scope('RNN') as scope: # self.initial_state = tf.zeros([self.config.batch_size, # self.config.hidden_size]) # state = self.initial_state # rnn_outputs = [] # for tstep, current_input in enumerate(inputs): # if tstep > 0: # scope.reuse_variables() # RNN_H = tf.get_variable('RNN_H_matrix', # [self.config.hidden_size, # self.config.hidden_size]) # RNN_I = tf.get_variable('RNN_I_matrix', # [2 * self.config.embed_size, # self.config.hidden_size]) # RNN_b = tf.get_variable('RNN_b_bias', # [self.config.hidden_size]) # state = tf.nn.sigmoid(tf.matmul(state, RNN_H) + tf.matmul( # current_input, RNN_I) + RNN_b) # tf.summary.histogram('hidden_weights', RNN_H) # rnn_outputs.append(state) # self.final_state = rnn_outputs[-1] # with tf.variable_scope('RNN_dropout'): # rnn_outputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in # rnn_outputs] # return rnn_outputs #______________________________________END_____________________________________________________________# #________________________________________row and column, apply next word row____________________________# with tf.variable_scope('InputDropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] with tf.variable_scope('RNN') as scope: self.initial_state = tf.zeros( [self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for tstep, current_input in enumerate(inputs): input_r, input_c, input_r_next = tf.split(current_input, 3, 1) if tstep > 0: scope.reuse_variables() RNN_H = tf.get_variable( 'RNN_H_matrix', [self.config.hidden_size, self.config.hidden_size]) RNN_I_r = tf.get_variable( 'RNN_I_r_matrix', [self.config.embed_size, self.config.hidden_size]) RNN_I_c = tf.get_variable( 'RNN_I_c_matrix', [self.config.embed_size, self.config.hidden_size]) RNN_b_r = tf.get_variable('RNN_b_r_bias', [self.config.hidden_size]) RNN_b_c = tf.get_variable('RNN_b_c_bias', [self.config.hidden_size]) if tstep == 0: state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(input_r, RNN_I_r) + RNN_b_r) state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(input_c, RNN_I_c) + RNN_b_c) rnn_outputs.append(state) state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(input_r_next, RNN_I_r) + RNN_b_r) rnn_outputs.append(state) else: state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(input_c, RNN_I_c) + RNN_b_c) rnn_outputs.append(state) state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(input_r_next, RNN_I_r) + RNN_b_r) rnn_outputs.append(state) tf.summary.histogram('hidden_weights', RNN_H) tf.summary.histogram('hidden_state', state) self.final_state = rnn_outputs[-1] with tf.variable_scope('RNN_dropout'): rnn_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs ] return rnn_outputs #___________________________________END_______________________________________________________# def add_projection(self, rnn_outputs, name='project'): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ #_______________________________baseline____________________________________________________# # with tf.name_scope(name): # U = tf.get_variable('U', [self.config.hidden_size, len(self.vocab)]) # b_2 = tf.get_variable('b_2', (len(self.vocab),)) # outputs = [tf.matmul(o, U) + b_2 for o in rnn_outputs] # # return outputs #__________________________________END__________________________________________________________# # _______________________________use row and column seperatly to predict_____________________________________________# with tf.name_scope(name): U_r = tf.get_variable( 'U_r', [self.config.hidden_size, self.matrix_length]) b_2_r = tf.get_variable('b_2_r', (self.matrix_length, )) U_c = tf.get_variable( 'U_c', [self.config.hidden_size, self.matrix_length]) b_2_c = tf.get_variable('b_2_c', (self.matrix_length, )) # tf.histogram_ assert len(rnn_outputs) % 2 == 0 rnn_outputs_rows = rnn_outputs[::2] rnn_outputs_columns = rnn_outputs[1::2] outputs_r = [tf.matmul(i, U_r) + b_2_r for i in rnn_outputs_rows] outputs_c = [ tf.matmul(j, U_c) + b_2_c for j in rnn_outputs_columns ] #此时output_r,c,都是一个list,每个元素为(batch_size,matrix_length),代表batch个预测和每个预测对应行/列的可能性。 return outputs_r, outputs_c # outputs = [] # for i in range(len(rnn_outputs)/2): # outputs.append(outputs_r[i]) # outputs.append(outputs_c[i]) # return outputs #返回的outputs是个每个元素为(batch_size, matrix_length * 2)的list # _______________________________END____________________________________________________# def add_loss_op(self, output_r, output_c): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ #____________________________________base____________________________________# # all_ones = [tf.ones([self.config.num_steps * self.config.batch_size])] # cross_entropy_loss = tf.contrib.seq2seq(logits=[output], # targets=[tf.reshape( # tensor=self.labels_placeholder, # shape=[-1])], # weights=all_ones) # tf.add_to_collection('total_loss', cross_entropy_loss) # loss = tf.add_n(tf.get_collection('total_loss')) # self.loss_summary = tf.summary.scalar('total_loss', loss) # return loss #________________________________________END______________________________________# # ____________________rewrite for row and column loss____________________________________# all_ones = [tf.ones([self.config.num_steps * self.config.batch_size])] cross_entropy_row_loss = sequence_loss( logits=[output_r], targets=[ tf.reshape( tensor=self.labels_placeholder // self.matrix_length, shape=[-1], ) ], weights=all_ones) cross_entropy_column_loss = sequence_loss( logits=[output_c], targets=[ tf.reshape( tensor=self.labels_placeholder % self.matrix_length, shape=[-1], ) ], weights=all_ones) # self.print = tf.Print(_, [_], summarize=10) tf.add_to_collection('total_loss', cross_entropy_row_loss) tf.add_to_collection('total_loss', cross_entropy_column_loss) loss = tf.add_n(tf.get_collection('total_loss')) self.loss_summary = tf.summary.scalar('total_loss', loss) return loss # ____________________________________END_____________________________________# def add_training_op(self, loss): """Sets up the training Ops. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ with tf.variable_scope(tf.get_variable_scope()) as vscope: optimzer = tf.train.AdamOptimizer(self.config.lr) # optimzer.minimize函数功能: # 计算loss对各个变量(tf.variables)的梯度, 并更新参数 train_op = optimzer.minimize(loss) tf.get_variable_scope().reuse_variables() return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) # self.rnn_outputs = self.add_model_RNN(self.inputs) self.outputs_r, self.outputs_c = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions_r = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs_r ] self.predictions_c = [ tf.nn.softmax(tf.cast(p, 'float64')) for p in self.outputs_c ] self.predictions_r = tf.reshape(tf.concat(self.predictions_r, 0), [1, -1]) # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output_r = tf.reshape(tf.concat(self.outputs_r, 1), [-1, self.matrix_length]) output_c = tf.reshape(tf.concat(self.outputs_c, 1), [-1, self.matrix_length]) # output = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output_r, output_c) self.train_step = self.add_training_op(self.calculate_loss) self.total_train_step = 0 def run_epoch(self, session, data, train_op=None, verbose=10, writer=None): config = self.config dp = config.dropout is_training = 1 if not train_op: train_op = tf.no_op() dp = 1 is_training = 0 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() merged_summary = tf.summary.merge_all() # merged_summary = tf.summary.merge([self.loss_summary]) for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): if is_training == 1: self.total_train_step += 1 # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } if step % 5 == 0: loss, state, _, summary_str = session.run([ self.calculate_loss, self.final_state, train_op, merged_summary ], feed_dict=feed) writer.add_summary(summary_str, self.total_train_step) else: loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
print("{:<10} {:^10}".format("Class", "Accuracy (%)")) for i in range(n_classes): print('{:<10} {:^10.2f}'.format( classes[i], 100 * class_correct[i] / class_total[i])) return confusion_matrix X_train, y_train = load_data('../../data/train.csv') X_valid, y_valid = load_data('../../data/valid.csv', test=True) train_tokens = pd.Series(X_train).apply(clean_text_simple) train_data = tr.loadTrees(train_tokens, y_train) train_sents = [tree.get_words() for tree in train_data] vocab = Vocab() vocab.construct(list(itertools.chain.from_iterable(train_sents))) test_net = RNN_Model(vocab, embed_size=300) for model in range(29, 30): model_path = "poids_entrainement/recursiveNN_model_epoch{}.pth".format( model) test_net.load_state_dict(torch.load(model_path)) y_pred_train = predict(X_train, test_net) print("Train: iter", model, accuracy_score(y_train, y_pred_train)) classes = ('negative', 'neutral', 'positive') confusion_matrix_train = accuracy_per_class(y_train, y_pred_train, classes) # Plot normalized confusion matrix plot_confusion_matrix(confusion_matrix_train,
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible. Adds following nodes to the computational graph. input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout rate placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(tf.float32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32, shape=()) def add_embedding(self): """Add embedding layer. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs. Hint: Here is the dimension of the variables (embedding matrix) you will need to create: embedding: (len(self.vocab), embed_size) corresponding to L in HW4. Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ embedding = tf.get_variable("embedding", [len(self.vocab), self.config.embed_size]) embedded = tf.nn.embedding_lookup(params=embedding, ids=self.input_placeholder) inputs = tf.transpose(embedded, perm=[1, 0, 2]) inputs = tf.split(inputs, self.config.num_steps, axis=0) # splits inputs into "num_steps" sub-tensors for i in range(self.config.num_steps): inputs[i] = tf.squeeze(inputs[i], [0]) return inputs def add_model(self, inputs): """Creates the RNN language model. Implement the equations for the RNN language model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Make sure to apply dropout to both the inputs and the outputs. How to do it for inputs has been provided. Hint: Use variable_scope to make it clear for each layer. (Names already given. You can change the given code but please keep the name.) Hint: Use the variable scope "RNN" to define RNN variables to enable weight sharing. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. You might find tf.zeros useful. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: To implement RNN, you need to perform an explicit for-loop over inputs. Read the documentation of tf.variable_scope to see how to achieve weight sharing. Hint: Here are the dimensions of the various variables you will need to create: RNN_H: (hidden_size, hidden_size) corresponding to H in HW4. RNN_I: (embed_size, hidden_size) corresponding to I in HW4. RNN_b: (hidden_size,) corresponding to b1 in HW4. (Don't change variable name) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ with tf.variable_scope('InputDropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] with tf.variable_scope('RNN', reuse=tf.AUTO_REUSE) as scope: # weight sharing RNN_H = tf.get_variable( shape=[self.config.hidden_size, self.config.hidden_size], dtype=tf.float32, name="RNN_H") RNN_I = tf.get_variable( shape=[self.config.embed_size, self.config.hidden_size], dtype=tf.float32, name="RNN_I") RNN_b = tf.get_variable(shape=[ self.config.hidden_size, ], dtype=tf.float32, name="RNN_b") self.initial_state = tf.zeros( shape=[self.config.batch_size, self.config.hidden_size], dtype=tf.float32) self.final_state = self.initial_state out = [] for input in inputs: self.final_state = tf.sigmoid( tf.matmul(self.final_state, RNN_H) + tf.matmul(input, RNN_I) + RNN_b) out.append(self.final_state) with tf.variable_scope('RNNDropout'): rnn_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in out ] return rnn_outputs def add_projection(self, rnn_outputs): """Adds a projection/output layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Use variable_scope to make it clear for each layer. (Names already given. You can change the given code but please keep the name.) Hint: Here are the dimensions of the variables you will need to create W: (hidden_size, len(vocab)) corresponding to U in HW4. b: (len(vocab),) corresponding to b2 in HW4. (Don't change variable name) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab)) """ with tf.variable_scope('Projection', reuse=tf.AUTO_REUSE): # weight sharing W = tf.get_variable( shape=[self.config.hidden_size, len(self.vocab)], dtype=tf.float32, name="W") b = tf.get_variable(shape=[ len(self.vocab), ], dtype=tf.float32, name="b") outputs = [] for rnn_output in rnn_outputs: outputs.append( tf.matmul(rnn_output, W) + b) #Note: softmax is already applied in starter code below return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.contrib.legacy_seq2seq.sequence_loss to implement sequence loss. -----------------------------Info for tensorflow.contrib.legacy_seq2seq.sequence_loss---------------- def sequence_loss(logits, targets, weights, average_across_timesteps=True, average_across_batch=True, softmax_loss_function=None, name=None): Weighted cross-entropy loss for a sequence of logits, batch-collapsed. Args: logits: List of 2D Tensors of shape [batch_size x num_decoder_symbols]. targets: List of 1D batch-sized int32 Tensors of the same length as logits. weights: List of 1D batch-sized float-Tensors of the same length as logits. average_across_timesteps: If set, divide the returned cost by the total label weight. average_across_batch: If set, divide the returned cost by the batch size. softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch to be used instead of the standard softmax (the default if this is None). name: Optional name for this operation, defaults to "sequence_loss". Returns: A scalar float Tensor: The average log-perplexity per symbol (weighted). ---------------------------------------------------------------------------------------------------- Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ # compute logits logits = output # size: [None, len(self.vocab)] = [640, 10000] logits = tf.reshape( logits, [self.config.batch_size, self.config.num_steps, len(self.vocab)]) # compute targets targets = self.labels_placeholder targets = tf.reshape(targets, [self.config.batch_size, self.config.num_steps]) targets = tf.cast(targets, dtype=tf.int32) # compute weights weights = tf.ones([self.config.batch_size, self.config.num_steps], dtype=tf.float32) # compute loss function loss = tf.contrib.seq2seq.sequence_loss(logits=logits, targets=targets, weights=weights) return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ optimizer = tf.train.AdamOptimizer(learning_rate=self.config.lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape( tf.concat(self.outputs, axis=1), [-1, len(self.vocab)]) # [10, 64, 10000] ==> [640, 10000] self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder(dtype=tf.int32, shape=(None, self.config.num_steps)) self.labels_placeholder = tf.placeholder(dtype=tf.float32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(dtype=tf.float32, shape=()) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE all_embeddings = tf.get_variable( name="lookup_table", shape=[len(self.vocab), self.config.embed_size]) input_embeddings = tf.nn.embedding_lookup( params=all_embeddings, ids=self.input_placeholder) # tf.split: Splits a tensor into sub tensors. # If num_or_size_splits is a scalar, num_split, then splits value along dimension axis into # num_split smaller tensors. embeddings_list = tf.split( value=input_embeddings, num_or_size_splits=self.config.num_steps, axis=1) # tf.squeeze - Removes dimensions of size 1 from the shape of a tensor. # resulted inputs will have dimensions (batch_size, embed_size) inputs = [ tf.squeeze(item, squeeze_dims=(1, )) for item in embeddings_list ] ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.variable_scope("projection"): U = tf.get_variable( name="U", shape=[self.config.hidden_size, len(self.vocab)]) b2 = tf.get_variable(name="b2", shape=[len(self.vocab)], initializer=tf.constant_initializer(0.0)) outputs = [] for rnn_step in rnn_outputs: temp = tf.matmul(rnn_step, U) + b2 outputs.append(temp) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE # weights A 2D Tensor of shape [batch_size x sequence_length] and dtype float. # Weights constitutes the weighting of each prediction in the sequence. # When using weights as masking set all valid timesteps to 1 and all padded timesteps to 0. all_ones_weight = tf.ones( shape=[self.config.batch_size, self.config.num_steps]) # tf.reshape(self.labels_placeholder,[self.config.batch_size * self.config.num_steps, -1]) reshape_labels = tf.cast(self.labels_placeholder, tf.int32) output_tensor = tf.convert_to_tensor(output) # Weighted cross-entropy loss for a sequence of logits (per example). # https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/sequence_loss loss = tf.contrib.seq2seq.sequence_loss(logits=output_tensor, targets=reshape_labels, weights=all_ones_weight) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE with tf.variable_scope('adam'): train_op = tf.train.AdamOptimizer(self.config.lr).minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config # load train/validation/test data from disk into memory self.load_data(debug=False) # defines data structures self.add_placeholders() # layer to retrieve word embeddings self.inputs = self.add_embedding() # constructs RNN Language model self.rnn_outputs = self.add_model(self.inputs) # The projection layer transforms the hidden representation # to a distribution over the vocabulary self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Adds loss ops to the computational graph self.calculate_loss = self.add_loss_op(self.outputs) # Sets up the training loss to the computation graph configuration self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE # hidden state weight matrix that converts hidden state self.H = tf.get_variable( "H", shape=[self.config.hidden_size, self.config.hidden_size]) self.I = tf.get_variable( "I", shape=[self.config.embed_size, self.config.hidden_size]) self.b_1 = tf.get_variable("b_1", shape=[self.config.hidden_size]) # initial hidden state is zero self.initial_state = tf.zeros( (self.config.batch_size, self.config.hidden_size)) state = self.initial_state rnn_outputs = [] # inference over num_steps of the sequence for time_step in xrange(self.config.num_steps): state = tf.nn.sigmoid( tf.matmul(state, self.H) + tf.matmul(inputs[time_step], self.I) + self.b_1) rnn_outputs.append(state) self.final_state = state ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([self.vocab.encode(word) for word in get_ptb_dataset('train')],dtype=np.int32) self.encoded_test = np.array([self.vocab.encode(word) for word in get_ptb_dataset('test')],dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder(tf.int32, shape=[None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout') def add_embedding(self): with tf.device('/cpu:0'): embedding = tf.get_variable('Embedding', [len(self.vocab), self.config.embed_size], trainable=True) imputs = tf.nn.embedding_lookup(embedding, self.input_placeholder) inputs = [tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs)] return inputs def add_prediction(self, rnn_outputs): with tf.variable_scope('Projection'): U = tf.get_variable('Matrix',[self.config.hidden_size, len(self.vocab)]) proj_b = tf.get_variable('Bias', [len(self.vocab)]) outputs = [tf.matmul(o,U) + proj_b for o in rnn_outputs] return outputs def add_loss_op(self, output): add_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], add_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) return loss def add_training_op(self, loss): optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(self.calculate_loss) return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] output = tf.reshape(tf.concat(1,self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): with tf.variable_scope('InputDropout'): inputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in inputs] with tf.variable_scope('RNN') as scope: self.initial_state = tf.zeros([self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() RNN_H = tf.get_variable('HMatrix',[self.config.hidden_size, self.config.hidden_size]) RNN_I = tf.get_variable('IMatrix',[self.config.embed_size, self.config.hidden_size]) RNN_b = tf.get_variable('B',[self.config.hidden_size]) state = tf.nn.sigmoid(tf.matmul(state, RNN_H) + tf.matmul(current_input, RNN_I)+RNN_b) rnn_outputs.append(state) self.final_state = rnn_outputs[-1] with tf.variable_scope('RNNDropout'): rnn_outputs = [tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs] return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x,y) in enumerate(ptb_iterator(data, config.batch_size, config.num_steps)): feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, _ = session.run([self.calculate_loss, self.final_state, train_op], feed_dict= feed) total_loss.append(loss) if verbose and step $ verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format(step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps]) self.labels_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps]) self.dropout_placeholder = tf.placeholder(tf.float32, name="dropout_keep_prob") ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE with tf.variable_scope("embedding_layer") as scope: embedding = tf.get_variable( "embedding", [len(self.vocab), self.config.embed_size], initializer=tf.random_uniform_initializer(-1, 1), trainable=True) variable_summaries(embedding, embedding.name) #tf.random_uniform_initializer(minval=0.0, maxval=1.0, seed=None, dtype=tf.float32) #tf.random_normal_initializer(mean=0, stddev=1., seed=None, dtype=tf.float32) #so each row corresponds to an word to embedding representation inputs = tf.nn.embedding_lookup(params=embedding, ids=self.input_placeholder) #this should use the id from input parameters to look up the embedding representation #shape of inputs is now (?, self.config.num_steps, self.config.embed_size) # (?, 10, 50) -> current case inputs = tf.split(1, self.config.num_steps, inputs) for i in range(len(inputs)): inputs[i] = tf.squeeze(inputs[i], [1]) #this removes the extra dimensions of size=1, at dim=1 ##print(len(inputs), inputs[0].get_shape()) # current_case length = 10, (?, 50) ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE with tf.variable_scope('projection'): U = tf.get_variable("U", [self.config.hidden_size, len(self.vocab)], initializer=tf.random_uniform_initializer()) b_2 = tf.get_variable("b_U", [len(self.vocab)], initializer=tf.constant_initializer(0.)) variable_summaries(U, U.name) variable_summaries(b_2, b_2.name) outputs = [] for rnn_step in rnn_outputs: out = tf.matmul(rnn_step, U) + b_2 outputs.append(out) ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) tf.scalar_summary('loss', loss) ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) self.merged_summaries = tf.merge_all_summaries() self.summary_writer = None def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE with tf.variable_scope('InputDropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] with tf.variable_scope('rnn') as scope: self.initial_state = tf.zeros( [self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for step, current_input in enumerate(inputs): if step > 0: scope.reuse_variables() Whh = tf.get_variable( 'Whh', [self.config.hidden_size, self.config.hidden_size], initializer=tf.random_normal_initializer(0, 1.)) Whx = tf.get_variable( 'Whx', [self.config.embed_size, self.config.hidden_size], initializer=tf.random_normal_initializer(0, 1.)) b_1 = tf.get_variable('b_h', [self.config.hidden_size], initializer=tf.constant_initializer(0.)) state = tf.nn.sigmoid( tf.matmul(state, Whh) + tf.matmul(current_input, Whx) + b_1) rnn_outputs.append(state) if step == 0: variable_summaries(Whh, Whh.name) variable_summaries(Whx, Whx.name) variable_summaries(b_1, b_1.name) self.final_state = rnn_outputs[-1] with tf.variable_scope('RNNDropout'): rnn_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs ] ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10, epoch=0): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _, merged = session.run([ self.calculate_loss, self.final_state, train_op, self.merged_summaries ], feed_dict=feed) total_loss.append(loss) if step % 50 == 0 and dp != 1: self.summary_writer.add_summary(merged, epoch * total_steps + step) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.items() if node.label!=2] node_tensors = tf.concat(node_tensors, 0) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. Hint: Look up tf.get_variable ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE embedding = tf.get_variable("embedding", (len(self.vocab), self.config.embed_size)) W1 = tf.get_variable("W1", (2*self.config.embed_size, self.config.embed_size)) b1 = tf.get_variable("b1", (1, self.config.embed_size)) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", (self.config.embed_size, self.config.label_size)) bs = tf.get_variable("bs", (1, self.config.label_size)) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by its parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ # sarim: # 1. You can think of add_model_vars as declaring the variables (i.e.: adding those variables to # the computation graph). What we are doing inside the "Composition" variable scope in # add_model is that we want to retrieve those same variables previously declared under # the "Composition" variable scope. Since we want to retrieve them and use them for forward # propagation, we have to indicate we want to reuse them. # 2. In short, in add_model's "Composition" scope you're retrieving those W1 and b1 values. with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable("embedding") W1 = tf.get_variable("W1") b1 = tf.get_variable("b1") ### END YOUR CODE # sarim: # Perhaps the simplest case of the gather use case comes up here: # http://www.michaelburge.us/2017/07/18/how-to-use-argmax-in-tensorflow.html # In add_model, you will be sending the function an embedding and a word ID. node_tensors = dict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE word_id = self.vocab.encode(node.word) curr_node_tensor = tf.expand_dims(tf.gather(embedding, word_id), 0) ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) node_input = tf.concat([node_tensors[node.left], node_tensors[node.right]], 1) ### YOUR CODE HERE curr_node_tensor = tf.nn.relu(tf.matmul(node_input, W1) + b1) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D / sarim: a scalar """ loss = None # YOUR CODE HERE loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) # sarim: l2 loss regularization with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable("W1") with tf.variable_scope('Projection', reuse=True): U = tf.get_variable("U") loss = loss + (self.config.l2 * (tf.nn.l2_loss(W1) + tf.nn.l2_loss(U))); # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE train_op = tf.train.GradientDescentOptimizer(self.config.lr).minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE predictions = tf.argmax(y, dimension=1) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in range(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.global_variables_initializer() sess.run(init) else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) for _ in range(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print() print('Training acc (only root node): {}'.format(train_acc)) print('Validation acc (only root node): {}'.format(val_acc)) print('Confusion matrix:') print(self.make_conf(train_labels, train_preds)) print(self.make_conf(val_labels, val_preds)) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in range(self.config.max_epochs): print('epoch %d'%epoch) if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print('annealed lr to %f'%self.config.lr) prev_epoch_loss = epoch_loss # save if model has improved on val if val_loss < best_val_loss: best_val_loss = val_loss best_val_epoch = epoch # if model has not improved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print('\n\nstopped at %d\n'%stopped) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in zip(labels, predictions): confmat[l, p] += 1 return confmat
class LSTMLM_Model(LanguageModel): def load_own_data(self, filename, filename2, filename3, debug=False, encoding='utf-8'): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_datafile(filename)) # self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array([ self.vocab.encode(word) for word in get_datafile(filename, encoding=encoding) ], dtype=np.int32) self.encoded_valid = np.array([ self.vocab.encode(word) for word in get_datafile(filename2, encoding=encoding) ], dtype=np.int32) self.encoded_test = np.array([ self.vocab.encode(word) for word in get_datafile(filename3, encoding=encoding) ], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): self.input_placeholder = tf.placeholder(tf.int32, [None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder(tf.int32, [None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout') self._new_lr = tf.placeholder(tf.float32, shape=[], name="new_learning_rate") def add_projection(self, rnn_outputs): with tf.variable_scope('Projection'): U = tf.get_variable('Matrix', [self.config.hidden_size, len(self.vocab)]) proj_b = tf.get_variable('Bias', [len(self.vocab)]) outputs = [tf.matmul(o, U) + proj_b for o in rnn_outputs] # END YOUR CODE return outputs def add_embedding(self): with tf.device('/cpu:0'): embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) inputs = tf.nn.embedding_lookup(embedding, self.input_placeholder) # inputs = [ # tf.squeeze(x, [1]) for x in tf.split(inputs, self.config.num_steps, 1)] return inputs def add_projection(self, lstm_output): with tf.variable_scope('Projection'): size = self.config.hidden_size vocab_size = self.vocab.__len__() softmax_w = tf.get_variable("softmax_w", [size, vocab_size], dtype=data_type()) softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type()) logits = tf.nn.xw_plus_b(lstm_output, softmax_w, softmax_b) # Reshape logits to be a 3-D tensor for sequence loss logits = tf.reshape( logits, [self.config.batch_size, self.config.num_steps, vocab_size]) return logits def add_loss_op(self, output): # Use the contrib sequence loss and average over the batches # all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] # cross_entropy = sequence_loss( # output, [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) # [tf.reshape(self.labels_placeholder, [-1])], # cost = tf.reduce_sum(cross_entropy) loss_1 = tf.contrib.seq2seq.sequence_loss( output, self.labels_placeholder, tf.ones([self.config.batch_size, self.config.num_steps], dtype=data_type()), average_across_timesteps=False, average_across_batch=True) self.cost = tf.reduce_sum(loss_1) tf.add_to_collection('total_loss', self.cost) loss = tf.add_n(tf.get_collection('total_loss')) # END YOUR CODE return loss def assign_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) def add_training_op(self): self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), self.config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self._lr) train_op = optimizer.apply_gradients( zip(grads, tvars), global_step=tf.train.get_or_create_global_step()) self._lr_update = tf.assign(self._lr, self._new_lr) # optimizer = tf.train.AdamOptimizer(self.config.lr) # train_op = optimizer.minimize(self.calculate_loss) return train_op def _get_lstm_cell(self, is_training): return tf.contrib.rnn.BasicLSTMCell(self.config.hidden_size, forget_bias=0.0, state_is_tuple=True, reuse=not is_training) def add_model(self, inputs, is_training): ''' Create the LSTM model ''' print(inputs.shape) with tf.variable_scope('InputDropout'): if is_training and self.config.dropout < 1: inputs = tf.nn.dropout(inputs, self.config.dropout) with tf.variable_scope('LSTMMODEL') as scope: def make_cell(): cell = self._get_lstm_cell(is_training) if is_training and self.config.dropout < 1: cell = tf.contrib.rnn.DropoutWrapper( cell, output_keep_prob=self.config.dropout) return cell cell = tf.contrib.rnn.MultiRNNCell( [make_cell() for _ in range(self.config.num_layers)], state_is_tuple=True) self.initial_state = cell.zero_state(self.config.batch_size, data_type()) state = self.initial_state # inputs = tf.unstack(inputs, num=self.config.num_steps, axis=1) # outputs, state = tf.nn.static_rnn( # cell, inputs, initial_state=self.initial_state) outputs = [] with tf.variable_scope("RNNV"): for time_step in range(self.config.num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(outputs, 1), [-1, self.config.hidden_size]) # return output, state # outputs, states = tf.nn.dynamic_rnn( # cell, inputs, dtype=tf.float32) self.final_state = state return output def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) # total_loss = [] # state = self.initial_state.eval() costs = 0.0 iters = 0 for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history #self.initial_state: state, feed = { self.input_placeholder: x, self.labels_placeholder: y, self.dropout_placeholder: dp } loss, state, cost, _ = session.run( [self.calculate_loss, self.final_state, self.cost, train_op], feed_dict=feed) # total_loss.append(loss) costs += cost iters += self.config.num_steps if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(costs / iters))) # sys.stdout.write('\r{} / {} : pp = {}'.format( # step, total_steps, np.exp(np.mean(total_loss)))) sys.stdout.flush() if verbose: sys.stdout.write('\r') # return np.exp(np.mean(total_loss)) return np.exp(costs / iters) def assign_lr(self, session, lr_value): session.run(self._lr_update, feed_dict={self._new_lr: lr_value}) def __init__(self, config): self.config = config dirname = "./data/" self.load_own_data(filename=dirname + "train_data", filename2=dirname + "dev_data", filename3=dirname + "test_data", debug=False, encoding='Latin-1') self.add_placeholders() # self._lr = tf.Variable(0.0, trainable=False) # self._lr_update = tf.assign(self._lr, self._new_lr) self.inputs = self.add_embedding() self.lstm_outputs = self.add_model(self.inputs, self.config.is_training) self.outputs = self.add_projection(self.lstm_outputs) vocab_size = self.vocab.__len__() logits2 = tf.reshape( self.outputs, [self.config.batch_size * self.config.num_steps, vocab_size]) local_pred = tf.nn.softmax(tf.cast(logits2, tf.float64)) local_pred2 = tf.reshape( local_pred, [self.config.batch_size, self.config.num_steps, vocab_size]) self.predictions = tf.transpose(local_pred2, [1, 0, 2]) self.calculate_loss = self.add_loss_op(self.outputs) self.train_step = self.add_training_op()
class RNN_Model(): def __init__(self, config): self.config = config self.load_data() def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data]#获取train中单词 self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root:#只对最后的输出节点做预测 node_tensors = node_tensors[tree.root] else: #只读取标签不为2的node,即不为neutral的node,此时对每个节点都做预测 #node代表合并后的词组,tensor代表对应的激活值 node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE tf.get_variable('embedding', [self.vocab.total_words, self.config.embed_size]) tf.get_variable('W1', [2 * self.config.embed_size, self.config.embed_size]) tf.get_variable('b1', [1, self.config.embed_size]) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE tf.get_variable('U', [self.config.embed_size, self.config.label_size]) tf.get_variable('bs', [1, self.config.label_size]) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable('embedding') W1 = tf.get_variable('W1') b1 = tf.get_variable('b1') ### END YOUR CODE node_tensors = dict() curr_node_tensor = None #判断是否为根节点,如果是则拆分,不是则返回词向量 if node.isLeaf: ### YOUR CODE HERE word_id = self.vocab.encode(node.word) #Gather slices from params according to indices.Produces an output tensor with shape indices.shape + params.shape[1:] # Scalar indices # output[:, ..., :] = params[indices, :, ... :] # 't' is a tensor of shape [2] #shape(expand_axiss(t, 0)) ==> [1, 2] #shape(expand_axiss(t, 1)) ==> [2, 1] #shape(expand_axiss(t, -1)) ==> [2, 1] # 't2' is a tensor of shape [2, 3, 5] #shape(expand_axiss(t2, 0)) ==> [1, 2, 3, 5] #shape(expand_axiss(t2, 2)) ==> [2, 3, 1, 5] #shape(expand_axiss(t2, 3)) ==> [2, 3, 5, 1] #获取编号为id的单词对应的词向量 curr_node_tensor = tf.expand_dims(tf.gather(embedding, word_id), 0) ### END YOUR CODE else:
class RNNLM_Model(LanguageModel): def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train') ], # 将句子get成word,再encode成one-hot向量 dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE self.input_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='Input') self.labels_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='Target') self.dropout_placeholder = tf.placeholder(tf.float32, name='Dropout') ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE embedding = tf.get_variable( 'Embedding', [len(self.vocab), self.config.embed_size], trainable=True) # L: (len(self.vocab), embed_size inputs = tf.nn.embedding_lookup( embedding, self.input_placeholder ) # Looks up ids in a list of embedding tensors. #经过验证,可以发现inputs此时的shape变为(?,self.config.num_steps,self.embedded_size) #that is to say--->对应的shape转化为了bne,即(batch_size,num_steps,embedded_size) inputs = [ tf.squeeze(x, [1]) for x in tf.split(1, self.config.num_steps, inputs) ] # remove specific dimensions of size 1 at postion=[1] #经过这一步操作,推测可能将Tensor的shape转化为了[(?,self.embedded_size),...,(?,self.embedded_size)],list的size为self.num_steps, #即转化为了nbe,that is to say--->(num_steps,batch_size,embedded_size) ### END YOUR CODE return inputs def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ ### YOUR CODE HERE #此时添加projection操作,即 #针对hidden_layer此时进行转化,turn the list[] which contains num_sizes Tensors which shape are (batch_size,hidden_size #into a list[] contains num_sizes Tensors which shape are (batch_size,embedded_size) with tf.variable_scope('Projection'): U = tf.get_variable('Matrix', [self.config.hidden_size, len(self.vocab)]) proj_b = tf.get_variable('Bias', [len(self.vocab)]) outputs = [tf.matmul(o, U) + proj_b for o in rnn_outputs] # outputs=rnn_outputs*U+b2 ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( # cross entropy [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) # 最终的loss ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(self.calculate_loss) # 用Adam最小化loss ### END YOUR CODE return train_op def __init__(self, config): self.config = config self.load_data(debug=False) self.add_placeholders() #更多细节可以观看https://www.jianshu.com/p/b4c5ff7c450f,上面对为什么需要add_embedding层做了解释! #对rnn的输入进行转化,嵌入 self.inputs = self.add_embedding() #进行运算,得到隐藏状态,即hidden_state,注意这儿写的rnn_outputs实际上还不是最后的输出,[]里面包括的数据shape为bh-->(batch_size,hidden_size) #list的size为num_steps,actually we can view this a tensor which shape is (nbh)--->(num_steps,batch_size,hidden_size) self.rnn_outputs = self.add_model(self.inputs) # rnn网络 #这里进行运算,得到了最后的输出 self.outputs = self.add_projection( self.rnn_outputs) # 对rnn输出结果进行projection # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] # 对projection进行softmax # Reshape the output into len(vocab) sized chunks - the -1 says as many as # needed to evenly divide #这儿可以看见tf.concat的作用,注意,tf.concat的第一个参数是维度,第二个参数是一个list,里面存放多个shape相同的Tensor或者其他的数据格式 #然后tf.concat就可以实现通道连接了,tf.concat之后的shape为(batch_size,num_steps*embedding) #再通过reshape操作就可以得到(batch_size*num_steps,embedding),即(bn,e) output = tf.reshape(tf.concat(1, self.outputs), [-1, len(self.vocab)]) # 对outputs进行reshape得到output self.calculate_loss = self.add_loss_op(output) # 对output计算loss self.train_step = self.add_training_op( self.calculate_loss) # 训练,使loss达到最小 def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable # initial state for the RNN self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable # last RNN output self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. dropout Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE with tf.variable_scope('InputDropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] # dropout of inputs with tf.variable_scope('RNN') as scope: self.initial_state = tf.zeros( # initial state of RNN [self.config.batch_size, self.config.hidden_size]) state = self.initial_state rnn_outputs = [] for tstep, current_input in enumerate(inputs): # tstep 多少个时刻,多少个单词 if tstep > 0: scope.reuse_variables() #如何把当前状态和以往的状态联系起来呢,scope.reuse_variables(),通过这样设置,state运算中的state就是前一状态的state RNN_H = tf.get_variable( 'HMatrix', [self.config.hidden_size, self.config.hidden_size]) RNN_I = tf.get_variable( 'IMatrix', [self.config.embed_size, self.config.hidden_size]) RNN_b = tf.get_variable('B', [self.config.hidden_size]) #这样我们得到的state的shape就是(batch_size,hidden_size) state = tf.nn.sigmoid( tf.matmul(state, RNN_H) + tf.matmul(current_input, RNN_I) + RNN_b) # 看这里state应该是当前时刻的隐藏层 rnn_outputs.append(state) # 不过它在下一个循环中就被用了,所以也是用来存上一时刻隐藏层的 #因为有num_steps self.final_state = rnn_outputs[-1] with tf.variable_scope('RNNDropout'): rnn_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in rnn_outputs ] # dropout of outputs ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum( 1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = { self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp } loss, state, _ = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) # 用RNN的final state,计算loss,并用train训练到最小loss total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) # 存step和loss,存到哪里啦?weights呢? sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.exp(np.mean(total_loss)) # 返回loss的指数平均
class LSTM(object): def __init__(self, config=Config()): self.config = config self.load_data() self.add_placeholders() self.inputs = self.add_embedding() self.lstm_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.lstm_outputs) self.predictions = [ tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs ] output = tf.reshape(tf.concat(self.outputs, 1), [-1, len(self.vocab)]) self.calculate_loss = self.add_loss_operation(output) self.train_step = self.add_training_operation(self.calculate_loss) def load_data(self): self.vocab = Vocab() self.vocab.construct(get_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_dataset('valid')], dtype=np.int32) def add_placeholders(self): self.input_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='input') self.label_placeholder = tf.placeholder( tf.int32, shape=[None, self.config.num_steps], name='target') self.dropout_placeholder = tf.placeholder(tf.float32, name='drouput') def add_embedding(self): embedding = tf.get_variable('embedding', [len(self.vocab), self.config.embed_size], trainable=True) embedded_input = tf.nn.embedding_lookup(embedding, self.input_placeholder) embedded_input = [ tf.squeeze(x, [1]) for x in tf.split(embedded_input, self.config.num_steps, 1) ] return embedded_input def add_model(self, inputs): with tf.variable_scope('input_dropout'): inputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in inputs ] with tf.variable_scope('lstm_lm') as scope: self.initial_state = tf.zeros( [self.config.batch_size, self.config.hidden_size]) self.initial_memory = tf.zeros( [self.config.batch_size, self.config.hidden_size]) state = self.initial_state memory = self.initial_memory lstm_outputs = [] for tstep, current_input in enumerate(inputs): if tstep > 0: scope.reuse_variables() # Input Gate lstm_W_i = tf.get_variable( 'lstm_W_i', [self.config.embed_size, self.config.hidden_size]) lstm_U_i = tf.get_variable( 'lstm_U_i', [self.config.hidden_size, self.config.hidden_size]) lstm_b_i = tf.get_variable('lstm_b_i', [self.config.hidden_size]) lstm_i_output = tf.nn.sigmoid( tf.matmul(current_input, lstm_W_i) + tf.matmul(state, lstm_U_i) + lstm_b_i) # Forget Gate lstm_W_f = tf.get_variable( 'lstm_W_f', [self.config.embed_size, self.config.hidden_size]) lstm_U_f = tf.get_variable( 'lstm_U_f', [self.config.hidden_size, self.config.hidden_size]) lstm_b_f = tf.get_variable('lstm_b_f', [self.config.hidden_size]) lstm_f_output = tf.nn.sigmoid( tf.matmul(current_input, lstm_W_f) + tf.matmul(state, lstm_U_f) + lstm_b_f) # Output Gate lstm_W_o = tf.get_variable( 'lstm_W_o', [self.config.embed_size, self.config.hidden_size]) lstm_U_o = tf.get_variable( 'lstm_U_o', [self.config.hidden_size, self.config.hidden_size]) lstm_b_o = tf.get_variable('lstm_b_o', [self.config.hidden_size]) lstm_o_output = tf.nn.sigmoid( tf.matmul(current_input, lstm_W_o) + tf.matmul(state, lstm_U_o) + lstm_b_o) # New Memory Gate lstm_W_c = tf.get_variable( 'lstm_W_c', [self.config.embed_size, self.config.hidden_size]) lstm_U_c = tf.get_variable( 'lstm_U_c', [self.config.hidden_size, self.config.hidden_size]) lstm_b_c = tf.get_variable('lstm_b_c', [self.config.hidden_size]) lstm_c_output = tf.nn.tanh( tf.matmul(current_input, lstm_W_c) + tf.matmul(state, lstm_U_c) + lstm_b_c) # Final Memory Gate memory = tf.multiply(lstm_f_output, memory) + tf.multiply( lstm_i_output, lstm_c_output) # Final Hidden State state = tf.multiply(lstm_o_output, tf.nn.tanh(memory)) lstm_outputs.append(state) self.final_state = lstm_outputs[-1] with tf.variable_scope('lstm_dropout'): lstm_outputs = [ tf.nn.dropout(x, self.dropout_placeholder) for x in lstm_outputs ] return lstm_outputs def add_projection(self, lstm_outputs): with tf.variable_scope('projection'): proj_U = tf.get_variable( 'U_matrix', [self.config.hidden_size, len(self.vocab)]) proj_b = tf.get_variable('b_vector', [len(self.vocab)]) outputs = [tf.matmul(h, proj_U) + proj_b for h in lstm_outputs] return outputs def add_loss_operation(self, output): all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])] cross_entropy = sequence_loss( [output], [tf.reshape(self.label_placeholder, [-1])], all_ones, len(self.vocab)) tf.add_to_collection('total_loss', cross_entropy) loss = tf.add_n(tf.get_collection('total_loss')) return loss def add_training_operation(self, loss): with tf.variable_scope(tf.get_variable_scope(), reuse=False): optimizer = tf.train.AdamOptimizer(self.config.lr) train_op = optimizer.minimize(loss) return train_op def run_epoch(self, sess, data, train_op=None, verbose=10): dropout = self.config.dropout if train_op == None: train_op = tf.no_op() dropout = 1 total_steps = sum(1 for x in data_iterator( data, self.config.batch_size, self.config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( data_iterator(data, self.config.batch_size, self.config.num_steps)): feed = { self.input_placeholder: x, self.label_placeholder: y, self.dropout_placeholder: dropout, self.initial_state: state } loss, state, _ = sess.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: sys.stdout.write('\r{} / {} : Avg. Loss = {}'.format( step, total_steps, np.mean(total_loss))) sys.stdout.flush() if verbose: sys.stdout.write('\r') return np.mean(total_loss)
class RNNLM_Model(LanguageModel): def print_graph(self): for i in tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='RNNLM'): print(i) def load_data(self, debug=False): """Loads starter word-vectors and train/dev/test data.""" print("loading data") self.vocab = Vocab() self.vocab.construct(get_ptb_dataset('train')) self.encoded_train = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('train')], dtype=np.int32) self.encoded_valid = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('valid')], dtype=np.int32) self.encoded_test = np.array( [self.vocab.encode(word) for word in get_ptb_dataset('test')], dtype=np.int32) if debug: num_debug = 1024 self.encoded_train = self.encoded_train[:num_debug] self.encoded_valid = self.encoded_valid[:num_debug] self.encoded_test = self.encoded_test[:num_debug] def add_placeholders(self): """Generate placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building code and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible Adds following nodes to the computational graph. (When None is in a placeholder's shape, it's flexible) input_placeholder: Input placeholder tensor of shape (None, num_steps), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, num_steps), type tf.float32 dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.dropout_placeholder (Don't change the variable names) """ ### YOUR CODE HERE print("adding placeholders") self.input_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) # changing the dtype of labels to be tf.int32 because the sequence_loss # function requirese the labels to be of that type (true/false?) self.labels_placeholder = tf.placeholder(tf.int32, shape=(None, self.config.num_steps)) self.dropout_placeholder = tf.placeholder(tf.float32) ### END YOUR CODE def add_embedding(self): """Add embedding layer. Hint: This layer should use the input_placeholder to index into the embedding. Hint: You might find tf.nn.embedding_lookup useful. Hint: You might find tf.split, tf.squeeze useful in constructing tensor inputs Hint: Check the last slide from the TensorFlow lecture. Hint: Here are the dimensions of the variables you will need to create: L: (len(self.vocab), embed_size) Returns: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). """ print("embedding") with tf.variable_scope('embedding') as scope: # The embedding lookup is currently only implemented for the CPU with tf.device('/cpu:0'): ### YOUR CODE HERE embeddings = tf.get_variable("embeddings", \ shape=(len(self.vocab), \ self.config.embed_size)) embeddings = tf.random_uniform([len(self.vocab), \ self.config.embed_size], -1,1) inputs = tf.reshape(tf.nn.embedding_lookup(embeddings, \ self.input_placeholder), (self.config.num_steps, self.config.batch_size * self.config.embed_size)) scope.reuse_variables() ### END YOUR CODE return inputs def weight_init(self, name, shape): weight = self.xavier_initializer(shape, name) return weight def bias_init(self, name, shape): return tf.get_variable(name, shape, \ tf.float32) def add_projection(self, rnn_outputs): """Adds a projection layer. The projection layer transforms the hidden representation to a distribution over the vocabulary. Hint: Here are the dimensions of the variables you will need to create U: (hidden_size, len(vocab)) b_2: (len(vocab),) Args: rnn_outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each a tensor of shape (batch_size, len(vocab) """ print("projecting") ### YOUR CODE HERE with tf.variable_scope("projection") as scope: weights = self.weight_init("weights", \ (self.config.hidden_size, len(self.vocab))) biases = self.bias_init("biases", len(self.vocab)) outputs = [] for i in xrange(self.config.num_steps): outputs.append(tf.matmul(rnn_outputs[i], weights) + biases) scope.reuse_variables() ### END YOUR CODE return outputs def add_loss_op(self, output): """Adds loss ops to the computational graph. Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. Args: output: A tensor of shape (None, self.vocab) Actually according to the docs this should be of the shape below ([batch_size x sequence_length x logits] tensor) https://www.tensorflow.org/api_docs/python/tf/contrib/seq2seq/sequence_loss But based on our result of the projection operation we should also try sequence_length x batch_size x logits Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE print("calculating loss") with tf.variable_scope("loss_op") as scope: labels = tf.reshape(self.labels_placeholder, [self.config.num_steps, self.config.batch_size]) weights = tf.ones(shape=tf.shape(labels), dtype=tf.float32, name="weights") loss = sequence_loss(logits=output, targets=labels, weights=weights, name="sequence_loss") tf.summary.scalar("loss", loss) scope.reuse_variables() ### END YOUR CODE return loss def add_training_op(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE print("training") with tf.variable_scope("training") as scope: train_op = tf.train.AdamOptimizer(self.config.lr).minimize(loss) scope.reuse_variables() ### END YOUR CODE return train_op def __init__(self, config): print("initializing") self.config = config self.xavier_initializer = xavier_weight_init() # Set debug=True to only grab 1024 words for train, validation and test self.load_data(debug=False) self.add_placeholders() self.inputs = self.add_embedding() self.rnn_outputs = self.add_model(self.inputs) self.outputs = self.add_projection(self.rnn_outputs) # We want to check how well we correctly predict the next word # We cast o to float64 as there are numerical issues at hand # (i.e. sum(output of softmax) = 1.00000298179 and not 1) self.predictions = [tf.nn.softmax(tf.cast(o, 'float64')) for o in self.outputs] output = tf.reshape(self.outputs, [self.config.num_steps, self.config.batch_size, len(self.vocab)]) self.calculate_loss = self.add_loss_op(output) self.train_step = self.add_training_op(self.calculate_loss) def add_model(self, inputs): """Creates the RNN LM model. In the space provided below, you need to implement the equations for the RNN LM model. Note that you may NOT use built in rnn_cell functions from tensorflow. Hint: Use a zeros tensor of shape (batch_size, hidden_size) as initial state for the RNN. Add this to self as instance variable self.initial_state (Don't change variable name) Hint: Add the last RNN output to self as instance variable self.final_state (Don't change variable name) Hint: Make sure to apply dropout to the inputs and the outputs. Hint: Use a variable scope (e.g. "RNN") to define RNN variables. Hint: Perform an explicit for-loop over inputs. You can use scope.reuse_variables() to ensure that the weights used at each iteration (each time-step) are the same. (Make sure you don't call this for iteration 0 though or nothing will be initialized!) Hint: Here are the dimensions of the various variables you will need to create: H: (hidden_size, hidden_size) I: (embed_size, hidden_size) b_1: (hidden_size,) Args: inputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, embed_size). Returns: outputs: List of length num_steps, each of whose elements should be a tensor of shape (batch_size, hidden_size) """ ### YOUR CODE HERE print("modeling") with tf.variable_scope("model") as scope: hidden_weights = self.weight_init("hidden_weights", (self.config.hidden_size, self.config.hidden_size)) weights = self.weight_init("weights", (self.config.embed_size, self.config.hidden_size)) biases = self.bias_init("biases", (self.config.hidden_size)) self.initial_state = tf.zeros((self.config.batch_size, self.config.hidden_size)) h_t = self.initial_state rnn_outputs = [] inputs = tf.nn.dropout(inputs, self.config.dropout) for i in xrange(self.config.num_steps): shaped_input = tf.reshape(inputs[i], \ (self.config.batch_size, self.config.embed_size)) h_t = tf.sigmoid(tf.matmul(h_t, hidden_weights) \ + tf.matmul(shaped_input, weights) + biases) tf.summary.histogram('h_t', h_t) tf.Print(i, [shaped_input, h_t] , message="Step") rnn_outputs.append(h_t) scope.reuse_variables() self.final_state = h_t rnn_outputs = tf.nn.dropout(rnn_outputs, self.config.dropout) ### END YOUR CODE return rnn_outputs def run_epoch(self, session, data, epoch=0, train_op=None, verbose=10): config = self.config dp = config.dropout if not train_op: train_op = tf.no_op() dp = 1 total_steps = sum(1 for x in ptb_iterator(data, config.batch_size, config.num_steps)) total_loss = [] state = self.initial_state.eval() for step, (x, y) in enumerate( ptb_iterator(data, config.batch_size, config.num_steps)): # We need to pass in the initial state and retrieve the final state to give # the RNN proper history feed = {self.input_placeholder: x, self.labels_placeholder: y, self.initial_state: state, self.dropout_placeholder: dp} loss, state, summary = session.run( [self.calculate_loss, self.final_state, train_op], feed_dict=feed) total_loss.append(loss) if verbose and step % verbose == 0: ## Logging summaries = tf.summary.merge_all() train_writer = tf.summary.FileWriter("./logs", session.graph) print("Loss is {}".format(loss)) print('\r{} / {} : pp = {}'.format( step, total_steps, np.exp(np.mean(total_loss)))) #sys.stdout.flush() #if verbose: #sys.stdout.write('\r') return np.exp(np.mean(total_loss))
class RNN_Model(): def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def inference(self, tree, predict_only_root=False): """For a given tree build the RNN models computation graph up to where it may be used for inference. Args: tree: a Tree object on which to build the computation graph for the RNN Returns: softmax_linear: Output tensor with the computed logits. """ node_tensors = self.add_model(tree.root) if predict_only_root: node_tensors = node_tensors[tree.root] else: node_tensors = [tensor for node, tensor in node_tensors.iteritems() if node.label!=2] node_tensors = tf.concat(0, node_tensors) return self.add_projections(node_tensors) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition'): ### YOUR CODE HERE tf.get_variable('embedding', shape=[self.vocab.total_words, self.config.embed_size]) tf.get_variable('W1', shape=[2*self.config.embed_size, self.config.embed_size]) tf.get_variable('b1',shape=[1,self.config.embed_size]) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE tf.get_variable('U',shape=[self.config.embed_size,self.config.label_size]) tf.get_variable('bs',shape=[1, self.config.label_size]) ### END YOUR CODE def add_model(self, node): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ with tf.variable_scope('Composition', reuse=True): ### YOUR CODE HERE embedding = tf.get_variable('embedding') W1=tf.get_variable('W1') b1=tf.get_variable('b1') ### END YOUR CODE node_tensors = dict() curr_node_tensor = None if node.isLeaf: ### YOUR CODE HERE idx = self.vocab.encode(node.word) h = tf.gather(embedding, indices=idx) curr_node_tensor = tf.expand_dims(h, 0) ### END YOUR CODE else: node_tensors.update(self.add_model(node.left)) node_tensors.update(self.add_model(node.right)) ### YOUR CODE HERE HlHr=tf.concat(1, [node_tensors[node.left], node_tensors[node.right]]) curr_node_tensor = tf.nn.relu(tf.matmul(HlHr, W1) + b1) ### END YOUR CODE node_tensors[node] = curr_node_tensor return node_tensors def add_projections(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope('Projection', reuse=True): U = tf.get_variable('U') bs = tf.get_variable('bs') logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def loss(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ loss = None # YOUR CODE HERE with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable('W1') with tf.variable_scope('Projection', reuse=True): U = tf.get_variable('U') l2loss = tf.nn.l2_loss(W1) + tf.nn.l2_loss(U) cross_entropy = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits, labels)) loss = cross_entropy + self.config.l2 * l2loss # sparse_softmax = tf.nn.sparse_softmax_cross_entropy_with_logits(logits,labels) # tf.add_to_collection('total_loss', tf.reduce_sum(sparse_softmax)) # for variable in [W1, U]: # tf.add_to_collection('total_loss', self.config.l2 * tf.nn.l2_loss(variable)) # loss = tf.add_n(tf.get_collection('total_loss')) # END YOUR CODE return loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ train_op = None # YOUR CODE HERE optimizer = tf.train.GradientDescentOptimizer(self.config.lr) train_op = optimizer.minimize(loss) # END YOUR CODE return train_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ predictions = None # YOUR CODE HERE predictions = tf.argmax(y, 1) # END YOUR CODE return predictions def __init__(self, config): self.config = config self.load_data() def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] for i in xrange(int(math.ceil(len(trees)/float(RESET_AFTER)))): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() saver = tf.train.Saver() saver.restore(sess, weights_path) for tree in trees[i*RESET_AFTER: (i+1)*RESET_AFTER]: logits = self.inference(tree, True) predictions = self.predictions(logits) root_prediction = sess.run(predictions)[0] if get_loss: root_label = tree.root.label loss = sess.run(self.loss(logits, [root_label])) losses.append(loss) results.append(root_prediction) return results, losses def run_epoch(self, new_model = False, verbose=True): step = 0 loss_history = [] while step < len(self.train_data): with tf.Graph().as_default(), tf.Session() as sess: self.add_model_vars() if new_model: init = tf.initialize_all_variables() sess.run(init) else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) for _ in xrange(RESET_AFTER): if step>=len(self.train_data): break tree = self.train_data[step] logits = self.inference(tree) labels = [l for l in tree.labels if l!=2] loss = self.loss(logits, labels) train_op = self.training(loss) loss, _ = sess.run([loss, train_op]) loss_history.append(loss) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() step+=1 saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print print 'Training acc (only root node): {}'.format(train_acc) print 'Valiation acc (only root node): {}'.format(val_acc) print self.make_conf(train_labels, train_preds) print self.make_conf(val_labels, val_preds) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in xrange(self.config.max_epochs): print 'epoch %d'%epoch if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch() complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print 'annealed lr to %f'%self.config.lr prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print '\n\nstopped at %d\n'%stopped return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in itertools.izip(labels, predictions): confmat[l, p] += 1 return confmat
class RNN_Model(): def __init__(self, config): self.config = config self.load_data() self.merged_summaries = None self.summary_writer = None self.is_a_leaf = tf.placeholder(tf.bool, [None], name="is_a_leaf") self.left_child = tf.placeholder(tf.int32, [None], name="lchild") self.right_child = tf.placeholder(tf.int32, [None], name="rchild") self.word_index = tf.placeholder(tf.int32, [None], name="word_index") self.labelholder = tf.placeholder(tf.int32, [None], name="labels_holder") self.add_model_vars() self.tensor_array = tf.TensorArray(tf.float32, size=0, dynamic_size=True, clear_after_read=False, infer_shape=False) #tensor array stores the vectors (embedded or composed) self.tensor_array_op = None self.prediction = None self.logits = None self.root_logits = None self.root_predict = None self.root_loss = None self.full_loss = None self.training_op = None #tensor_array_op is the operation on the TensorArray # private functions used to construct the graph. def _embed_word(self, word_index): with tf.variable_scope("Composition", reuse=True) as scope: print(scope.name) embedding = tf.get_variable("embedding") print(embedding.name) return tf.expand_dims(tf.gather(embedding, word_index), 0) # private functions used to construct the graph. def _combine_children(self, left_index, right_index): left_tensor = self.tensor_array.read(left_index) right_tensor = self.tensor_array.read(right_index) with tf.variable_scope('Composition', reuse=True): W1 = tf.get_variable('W1') b1 = tf.get_variable('b1') return tf.nn.relu(tf.matmul(tf.concat(1, [left_tensor, right_tensor]), W1) + b1) # i is the index (over data stored in the placeholders) # identical type[out] = type[in]; can be used in while_loop # so first iteration -> puts left most leaf on the tensorarray (and increments i) # next iteration -> puts next left most (leaf on stack) and increments i # .... # until all the leaves are on the stack in the correct order # starts combining the leaves after and adding to the stack def _loop_over_tree(self, tensor_array, i): is_leaf = tf.gather(self.is_a_leaf, i) word_idx = tf.gather(self.word_index, i) left_child = tf.gather(self.left_child, i) right_child = tf.gather(self.right_child, i) node_tensor = tf.cond(is_leaf, lambda : self._embed_word(word_idx), lambda : self._combine_children(left_child, right_child)) tensor_array = tensor_array.write(i, node_tensor) i = tf.add(i,1) return tensor_array, i def construct_tensor_array(self): loop_condition = lambda tensor_array, i: \ tf.less(i, tf.squeeze(tf.shape(self.is_a_leaf))) #iterate over all leaves + composition tensor_array_op = tf.while_loop(cond=loop_condition, body=self._loop_over_tree, loop_vars=[self.tensor_array, 0], parallel_iterations=1)[0] return tensor_array_op def inference_op(self, predict_only_root=False): if predict_only_root: return self.root_logits_op() return self.logits_op() def load_data(self): """Loads train/dev/test data and builds vocabulary.""" self.train_data, self.dev_data, self.test_data = tr.simplified_data(700, 100, 200) # build vocab from training data self.vocab = Vocab() train_sents = [t.get_words() for t in self.train_data] self.vocab.construct(list(itertools.chain.from_iterable(train_sents))) def add_model_vars(self): ''' You model contains the following parameters: embedding: tensor(vocab_size, embed_size) W1: tensor(2* embed_size, embed_size) b1: tensor(1, embed_size) U: tensor(embed_size, output_size) bs: tensor(1, output_size) Hint: Add the tensorflow variables to the graph here and *reuse* them while building the compution graphs for composition and projection for each tree Hint: Use a variable_scope "Composition" for the composition layer, and "Projection") for the linear transformations preceding the softmax. ''' with tf.variable_scope('Composition') as scope: ### YOUR CODE HERE #initializer=initializer=tf.random_normal_initializer(0,3) print(scope.name) embedding = tf.get_variable("embedding", [self.vocab.total_words, self.config.embed_size]) print(embedding.name) W1 = tf.get_variable("W1", [2 * self.config.embed_size, self.config.embed_size]) b1 = tf.get_variable("b1", [1, self.config.embed_size]) l2_loss = tf.nn.l2_loss(W1) tf.add_to_collection(name="l2_loss", value=l2_loss) variable_summaries(embedding, embedding.name) variable_summaries(W1, W1.name) variable_summaries(b1, b1.name) ### END YOUR CODE with tf.variable_scope('Projection'): ### YOUR CODE HERE U = tf.get_variable("U", [self.config.embed_size, self.config.label_size]) bs = tf.get_variable("bs", [1, self.config.label_size]) variable_summaries(U, U.name) variable_summaries(bs, bs.name) l2_loss = tf.nn.l2_loss(U) tf.add_to_collection(name="l2_loss", value=l2_loss) ### END YOUR CODE def add_model(self): """Recursively build the model to compute the phrase embeddings in the tree Hint: Refer to tree.py and vocab.py before you start. Refer to the model's vocab with self.vocab Hint: Reuse the "Composition" variable_scope here Hint: Store a node's vector representation in node.tensor so it can be used by it's parent Hint: If node is a leaf node, it's vector representation is just that of the word vector (see tf.gather()). Args: node: a Node object Returns: node_tensors: Dict: key = Node, value = tensor(1, embed_size) """ if self.tensor_array_op is None: self.tensor_array_op = self.construct_tensor_array() return self.tensor_array_op def add_projections_op(self, node_tensors): """Add projections to the composition vectors to compute the raw sentiment scores Hint: Reuse the "Projection" variable_scope here Args: node_tensors: tensor(?, embed_size) Returns: output: tensor(?, label_size) """ logits = None ### YOUR CODE HERE with tf.variable_scope("Projection", reuse=True): U = tf.get_variable("U") bs = tf.get_variable("bs") logits = tf.matmul(node_tensors, U) + bs ### END YOUR CODE return logits def logits_op(self): #this is an operation on the updated tensor_array if self.logits is None: self.logits = self.add_projections_op(self.tensor_array_op.concat()) return self.logits def root_logits_op(self): #construct once if self.root_logits is None: self.root_logits = self.add_projections_op(self.tensor_array_op.read(self.tensor_array_op.size() -1)) return self.root_logits def root_prediction_op(self): if self.root_predict is None: self.root_predict = tf.squeeze(tf.argmax(self.root_logits_op(), 1)) return self.root_predict def full_loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.full_loss is None: loss = None # YOUR CODE HERE l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss")) idx = tf.where(tf.less(self.labelholder,2)) logits = tf.gather(logits, idx) labels = tf.gather(labels, idx) objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar("loss_l2", l2_loss) tf.summary.scalar("loss_objective", tf.reduce_sum(objective_loss)) tf.summary.scalar("loss_total", loss) self.full_loss = loss # END YOUR CODE return self.full_loss def loss_op(self, logits, labels): """Adds loss ops to the computational graph. Hint: Use sparse_softmax_cross_entropy_with_logits Hint: Remember to add l2_loss (see tf.nn.l2_loss) Args: logits: tensor(num_nodes, output_size) labels: python list, len = num_nodes Returns: loss: tensor 0-D """ if self.root_loss is None: #construct once guard loss = None # YOUR CODE HERE l2_loss = self.config.l2 * tf.add_n(tf.get_collection("l2_loss")) objective_loss = tf.reduce_sum(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) loss = objective_loss + l2_loss tf.summary.scalar("root_loss_l2", l2_loss) tf.summary.scalar("root_loss_objective", tf.reduce_sum(objective_loss)) tf.summary.scalar("root_loss_total", loss) self.root_loss = loss # END YOUR CODE return self.root_loss def training(self, loss): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Hint: Use tf.train.GradientDescentOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: tensor 0-D Returns: train_op: tensorflow op for training. """ if self.training_op is None: # YOUR CODE HERE optimizer = tf.train.AdamOptimizer(self.config.lr)#tf.train.GradientDescentOptimizer(self.config.lr) #optimizer = tf.train.AdamOptimizer(self.config.lr) self.training_op = optimizer.minimize(loss) # END YOUR CODE return self.training_op def predictions(self, y): """Returns predictions from sparse scores Args: y: tensor(?, label_size) Returns: predictions: tensor(?,1) """ if self.prediction is None: # YOUR CODE HERE self.prediction = tf.argmax(y, dimension=1) # END YOUR CODE return self.prediction def build_feed_dict(self, in_node): nodes_list = [] tr.leftTraverse(in_node, lambda node, args: args.append(node), nodes_list) node_to_index = OrderedDict() for idx, i in enumerate(nodes_list): node_to_index[i] = idx feed_dict = { self.is_a_leaf : [ n.isLeaf for n in nodes_list ], self.left_child : [ node_to_index[n.left] if not n.isLeaf else -1 for n in nodes_list ], self.right_child : [ node_to_index[n.right] if not n.isLeaf else -1 for n in nodes_list ], self.word_index : [ self.vocab.encode(n.word) if n.word else -1 for n in nodes_list ], self.labelholder : [ n.label for n in nodes_list ] } return feed_dict def predict(self, trees, weights_path, get_loss = False): """Make predictions from the provided model.""" results = [] losses = [] logits = self.root_logits_op() #evaluation is based upon the root node root_loss = self.loss_op(logits=logits, labels=self.labelholder[-1:]) root_prediction_op = self.root_prediction_op() with tf.Session() as sess: saver = tf.train.Saver() saver.restore(sess, weights_path) for t in trees: feed_dict = self.build_feed_dict(t.root) if get_loss: root_prediction, loss = sess.run([root_prediction_op, root_loss], feed_dict=feed_dict) losses.append(loss) results.append(root_prediction) else: root_prediction = sess.run(root_prediction_op, feed_dict=feed_dict) results.append(root_prediction) return results, losses #need to rework this: (OP creation needs to be made independent of using OPs) def run_epoch(self, new_model = False, verbose=True, epoch=0): loss_history = [] random.shuffle(self.train_data) with tf.Session() as sess: if new_model: add_model_op = self.add_model() logits = self.logits_op() loss = self.full_loss_op(logits=logits, labels=self.labelholder) train_op = self.training(loss) init = tf.global_variables_initializer() sess.run(init) else: saver = tf.train.Saver() saver.restore(sess, './weights/%s.temp'%self.config.model_name) logits = self.logits_op() loss = self.full_loss_op(logits=logits, labels=self.labelholder) train_op = self.training(loss) for step, tree in enumerate(self.train_data): feed_dict = self.build_feed_dict(tree.root) loss_value, _ = sess.run([loss, train_op], feed_dict=feed_dict) loss_history.append(loss_value) if verbose: sys.stdout.write('\r{} / {} : loss = {}'.format( step+1, len(self.train_data), np.mean(loss_history))) sys.stdout.flush() saver = tf.train.Saver() if not os.path.exists("./weights"): os.makedirs("./weights") #print('./weights/%s.temp'%self.config.model_name) saver.save(sess, './weights/%s.temp'%self.config.model_name) train_preds, _ = self.predict(self.train_data, './weights/%s.temp'%self.config.model_name) val_preds, val_losses = self.predict(self.dev_data, './weights/%s.temp'%self.config.model_name, get_loss=True) train_labels = [t.root.label for t in self.train_data] val_labels = [t.root.label for t in self.dev_data] train_acc = np.equal(train_preds, train_labels).mean() val_acc = np.equal(val_preds, val_labels).mean() print() print('Training acc (only root node): {}'.format(train_acc)) print('Valiation acc (only root node): {}'.format(val_acc)) print(self.make_conf(train_labels, train_preds)) print(self.make_conf(val_labels, val_preds)) return train_acc, val_acc, loss_history, np.mean(val_losses) def train(self, verbose=True): complete_loss_history = [] train_acc_history = [] val_acc_history = [] prev_epoch_loss = float('inf') best_val_loss = float('inf') best_val_epoch = 0 stopped = -1 for epoch in range(self.config.max_epochs): print('epoch %d'%epoch) if epoch==0: train_acc, val_acc, loss_history, val_loss = self.run_epoch(new_model=True, epoch=epoch) else: train_acc, val_acc, loss_history, val_loss = self.run_epoch(epoch=epoch) complete_loss_history.extend(loss_history) train_acc_history.append(train_acc) val_acc_history.append(val_acc) #lr annealing epoch_loss = np.mean(loss_history) if epoch_loss>prev_epoch_loss*self.config.anneal_threshold: self.config.lr/=self.config.anneal_by print('annealed lr to %f'%self.config.lr) prev_epoch_loss = epoch_loss #save if model has improved on val if val_loss < best_val_loss: shutil.copyfile('./weights/%s.temp'%self.config.model_name, './weights/%s'%self.config.model_name) best_val_loss = val_loss best_val_epoch = epoch # if model has not imprvoved for a while stop if epoch - best_val_epoch > self.config.early_stopping: stopped = epoch #break if verbose: sys.stdout.write('\r') sys.stdout.flush() print('\n\nstopped at %d\n'%stopped) return { 'loss_history': complete_loss_history, 'train_acc_history': train_acc_history, 'val_acc_history': val_acc_history, } def make_conf(self, labels, predictions): confmat = np.zeros([2, 2]) for l,p in zip(labels, predictions): confmat[l, p] += 1 return confmat