def compute_cell_dynamics(args): with tf.Graph().as_default(): # You can change this around, but make sure to reset it to 41 when # submitting. np.random.seed(41) tf.set_random_seed(41) with tf.variable_scope("dynamics"): x_placeholder = tf.placeholder(tf.float32, shape=(None, 1)) h_placeholder = tf.placeholder(tf.float32, shape=(None, 1)) def mat(x): return np.atleast_2d(np.array(x, dtype=np.float32)) def vec(x): return np.atleast_1d(np.array(x, dtype=np.float32)) with tf.variable_scope("cell"): Ur, Wr, Uz, Wz, Uo, Wo = [ mat(3 * x) for x in np.random.randn(6) ] br, bz, bo = [vec(x) for x in np.random.randn(3)] params = [Ur, Wr, br, Uz, Wz, bz, Uo, Wo, bo] tf.get_variable("W_r", initializer=Wr) tf.get_variable("U_r", initializer=Ur) tf.get_variable("b_r", initializer=br) tf.get_variable("W_z", initializer=Wz) tf.get_variable("U_z", initializer=Uz) tf.get_variable("b_z", initializer=bz) tf.get_variable("W_o", initializer=Wo) tf.get_variable("U_o", initializer=Uo) tf.get_variable("b_o", initializer=bo) tf.get_variable_scope().reuse_variables() y_gru, h_gru = GRUCell(1, 1)(x_placeholder, h_placeholder, scope="cell") y_rnn, h_rnn = GRUCell(1, 1)(x_placeholder, h_placeholder, scope="cell") init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) x = mat(np.zeros(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_rnn = session.run([h_rnn], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 0, h, ht_rnn, ht_gru, params) x = mat(np.ones(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_rnn = session.run([h_rnn], feed_dict={ x_placeholder: x, h_placeholder: h }) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 1, h, ht_rnn, ht_gru, params)
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) pass ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) U = tf.get_variable(name='U', shape=(Config.hidden_size, Config.n_classes), dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable(name='b2', shape=(Config.n_classes, ), dtype=tf.float32, initializer=tf.zeros_initializer()) ## h shape needs to match input_placeholder h = tf.zeros([tf.shape(self.input_placeholder)[0], Config.hidden_size]) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step > 0: tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) ## at test time the dropout_rate is set to 1, so this shouldn't be 1 - dropout o_drop = tf.nn.dropout(o, keep_prob=dropout_rate) y = tf.matmul(o_drop, U) + b2 #preds.append(tf.expand_dims(y,axis=1)) preds.append(y) pass ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds, axis=1) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
class RNNModel(NERModel): """ Implements a recursive neural network with an embedding layer and single hidden layer. This network will predict a sequence of labels (e.g. PER) for a given token (e.g. Henry) using a featurized window around the token. """ def add_placeholders(self): """Generates placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible (so we can use different batch sizes without rebuilding the model). Adds following nodes to the computational graph input_placeholder: Input placeholder tensor of shape (None, self.max_length, n_features), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, self.max_length), type tf.int32 mask_placeholder: Mask placeholder tensor of shape (None, self.max_length), type tf.bool dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 TODO: Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.mask_placeholder self.dropout_placeholder HINTS: - Remember to use self.max_length NOT Config.max_length (Don't change the variable names) """ ### YOUR CODE HERE (~4-6 lines) if self.config.cell == "rnn": self.cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": self.cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) self.embedded = nn.Embedding.from_pretrained(t.from_numpy(self.pretrained_embeddings), freeze=False) self.U = nn.init.xavier_uniform_(t.zeros((self.config.hidden_size, self.config.n_classes),requires_grad=True, dtype=t.float32)) self.b2 = t.zeros((self.config.n_classes), dtype=t.float32, requires_grad=True) ### END YOUR CODE def create_feed_dict(self, inputs_batch, mask_batch, labels_batch=None, dropout=1): """Creates the feed_dict for the dependency parser. A feed_dict takes the form of: feed_dict = { <placeholder>: <tensor of values to be passed for placeholder>, .... } Hint: The keys for the feed_dict should be a subset of the placeholder tensors created in add_placeholders. Hint: When an argument is None, don't add it to the feed_dict. Args: inputs_batch: A batch of input data. mask_batch: A batch of mask data. labels_batch: A batch of label data. dropout: The dropout rate. Returns: feed_dict: The feed dictionary mapping from placeholders to values. """ ### YOUR CODE (~6-10 lines) ### END YOUR CODE return feed_dict def add_embedding(self): """Adds an embedding layer that maps from input tokens (integers) to vectors and then concatenates those vectors: TODO: - Create an embedding tensor and initialize it with self.pretrained_embeddings. - Use the input_placeholder to index into the embeddings tensor, resulting in a tensor of shape (None, max_length, n_features, embed_size). - Concatenates the embeddings by reshaping the embeddings tensor to shape (None, max_length, n_features * embed_size). HINTS: - You might find tf.nn.embedding_lookup useful. - You can use tf.reshape to concatenate the vectors. See following link to understand what -1 in a shape means. https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#reshape. Returns: embeddings: tf.Tensor of shape (None, max_length, n_features*embed_size) """ ### YOUR CODE HERE (~4-6 lines) embeddings = self.embedded(self.inputs).view(-1, self.max_length, self.config.n_features*self.config.embed_size) ### END YOUR CODE return embeddings def add_prediction_op(self, is_train=True): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.config.dropout preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) # X.size(0)代表batch的大小 h_t = t.zeros(x.size(0), self.config.hidden_size, requires_grad=False) ### END YOUR CODE for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) o_t, h_t = self.cell(x[:, time_step, :], h_t) o_drop_t = F.dropout(o_t, dropout_rate, training=is_train) y_t = t.mm(o_drop_t, self.U) + self.b2 preds.append(y_t) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = t.stack(preds, dim=1) ### END YOUR CODE assert t.tensor(preds.detach().numpy()[0, :, :].shape).tolist() == [self.max_length, self.config.n_classes], "predictions are not of the right shape. Expected {}, got {}".format([self.max_length, self.config.n_classes], t.tensor(preds.detach().numpy()[0, :, :].shape).tolist()) return preds def add_loss_op(self, preds): """Adds Ops for the loss function to the computational graph. TODO: Compute averaged cross entropy loss for the predictions. Importantly, you must ignore the loss for any masked tokens. Hint: You might find tf.boolean_mask useful to mask the losses on masked tokens. Hint: You can use tf.nn.sparse_softmax_cross_entropy_with_logits to simplify your implementation. You might find tf.reduce_mean useful. Args: pred: A tensor of shape (batch_size, max_length, n_classes) containing the output of the neural network before the softmax layer. Returns: loss: A 0-d tensor (scalar) """ ### YOUR CODE HERE (~2-4 lines) criterion = nn.CrossEntropyLoss(reduction='none') loss = t.mul(self.mask.flatten(), criterion(preds.view(-1, 5), self.labels.flatten())).sum() ### END YOUR CODE return loss def add_training_op(self): """Sets up the training Ops. Creates an optimizer and applies the gradients to all trainable variables. The Op returned by this function is what must be passed to the `sess.run()` call to cause the model to train. See https://www.tensorflow.org/versions/r0.7/api_docs/python/train.html#Optimizer for more information. Use tf.train.AdamOptimizer for this model. Calling optimizer.minimize() will return a train_op object. Args: loss: Loss tensor, from cross_entropy_loss. Returns: train_op: The Op for training. """ ### YOUR CODE HERE (~1-2 lines) train_op = opt.Adam([{'params': self.cell.parameters()}, {'params': self.U}, {'params' : self.b2}, {'params' : self.embedded.weight}], lr=self.config.lr) ### END YOUR CODE return train_op def preprocess_sequence_data(self, examples): def featurize_windows(data, start, end, window_size = 1): """Uses the input sequences in @data to construct new windowed data points. """ ret = [] for sentence, labels in data: from util import window_iterator sentence_ = [] for window in window_iterator(sentence, window_size, beg=start, end=end): sentence_.append(sum(window, [])) ret.append((sentence_, labels)) return ret examples = featurize_windows(examples, self.helper.START, self.helper.END) return pad_sequences(examples, self.max_length) def consolidate_predictions(self, examples_raw, examples, preds): """Batch the predictions into groups of sentence length. """ assert len(examples_raw) == len(examples) assert len(examples_raw) == len(preds) ret = [] for i, (sentence, labels) in enumerate(examples_raw): _, _, mask = examples[i] labels_ = [l for l, m in zip(preds[i], mask) if m] # only select elements of mask. assert len(labels_) == len(labels) ret.append([sentence, labels, labels_]) return ret def predict_on_batch(self, inputs_batch, mask_batch): self.inputs = t.from_numpy(inputs_batch) self.mask = t.from_numpy(mask_batch.astype(np.float32)) with t.no_grad(): predictions = self.add_prediction_op(is_train=False) # 这里数据是三维的,并且最后一维才代表类别,所以axis=2 return predictions.numpy().argmax(axis=2) def train_on_batch(self, inputs_batch, labels_batch, mask_batch): self.inputs = t.from_numpy(inputs_batch) self.labels = t.from_numpy(labels_batch) # 将True, False转化为0, 1的表达 self.mask = t.from_numpy(mask_batch.astype(np.float32)) self.train_op.zero_grad() loss = self.add_loss_op(self.add_prediction_op()) loss.backward() self.train_op.step() # print(loss.item()) return loss.item() def __init__(self, helper, config, pretrained_embeddings, report=None): super(RNNModel, self).__init__(helper, config, report) self.max_length = min(Config.max_length, helper.max_length) Config.max_length = self.max_length # Just in case people make a mistake. self.pretrained_embeddings = pretrained_embeddings # Defining placeholders. self.inputs = None self.labels = None self.mask = None self.build()