def __call__(self, inputs, state, scope=None): """Updates the state using the previous @state and @inputs. Args: inputs: is the input vector of size [None, self.input_size] state: is the previous state vector of size [None, self.state_size] scope: is the name of the scope to be used when defining the variables inside. Returns: a pair of the output vector and the new state vector. """ scope = scope or type(self).__name__ with tf.variable_scope(scope): W_x = tf.get_variable('W_x', shape=[self.input_size, self.state_size], dtype=tf.float32, initializer=xavier_weight_init()) W_h = tf.get_variable('W_h', shape=[self.state_size, self.state_size], dtype=tf.float32, initializer=xavier_weight_init()) b = tf.get_variable('b', shape=[ self.state_size, ], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) new_state = tf.sigmoid( tf.matmul(inputs, W_x) + tf.matmul(state, W_h) + b) output = new_state return output, new_state
def add_prediction_op(self): """ Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder weight_initializer = xavier_weight_init() self.W = tf.Variable( weight_initializer( (self.config.n_window_features * self.config.embed_size, self.config.hidden_size))) self.b1 = tf.Variable(tf.zeros(self.config.hidden_size, tf.float32)) self.U = tf.Variable( weight_initializer( (self.config.hidden_size, self.config.n_classes))) self.b2 = tf.Variable(tf.zeros(self.config.n_classes, tf.float32)) h = tf.nn.relu(tf.matmul(x, self.W) + self.b1) h_drop = tf.nn.dropout(h, 1 - self.config.dropout) pred = tf.matmul(h_drop, self.U) + self.b2 return pred
def add_embeddings(self): """Creates embeddings that map word, tag, deprels to vectors Embedding layers convert (sparse) ID representations to dense, lower-dimensional representations. Inputs are integers, outputs are floats. - Create 3 embedding matrices, one for each of the input types. Input values index the rows of the matrices to extract. The max bound (exclusive) on the values in the input can be found in {n_word_ids, n_tag_ids, n_deprel_ids} After lookup, the resulting tensors should each be of shape (None, n, embed_size), where n is one of {n_word_features, n_tag_features, n_deprel_features}. - Initialize the word_id embedding matrix with self.word_embeddings. Initialize the other two matrices with the Xavier initialization you implemented - Reshape the embedding tensors into shapes (None, n * embed_size) ** Embedding matrices should be variables, not constants! ** Use tf.nn.embedding_lookup. Also take a look at tf.reshape Returns: word_embeddings : tf.Tensor of type tf.float32 and shape (None, n_word_features * embed_size) tag_embeddings : tf.float32 (None, n_tag_features * embed_size) deprel_embeddings : tf.float32 (None, n_deprel_features * embed_size) """ ### BEGIN YOUR CODE xavier_initializer = xavier_weight_init() word_embedding_matrix = tf.get_variable("word_embedding_matrix", [self.config.n_word_ids, self.config.embed_size], initializer=tf.constant_initializer(self.word_embeddings)) tag_embedding_matrix = tf.get_variable("tag_embedding_matrix", [self.config.n_tag_ids, self.config.embed_size], initializer=xavier_initializer) deprel_embedding_matrix = tf.get_variable("deprel_embedding_matrix", [self.config.n_deprel_ids, self.config.embed_size], initializer=xavier_initializer) word_lookup = tf.nn.embedding_lookup(word_embedding_matrix, self.word_id_placeholder) tag_lookup = tf.nn.embedding_lookup(tag_embedding_matrix, self.tag_id_placeholder) deprel_lookup = tf.nn.embedding_lookup(deprel_embedding_matrix, self.deprel_id_placeholder) word_embeddings = tf.reshape(word_lookup, [-1, self.config.n_word_features * self.config.embed_size]) tag_embeddings = tf.reshape(tag_lookup, [-1, self.config.n_tag_features * self.config.embed_size]) deprel_embeddings = tf.reshape(deprel_lookup, [-1, self.config.n_deprel_features * self.config.embed_size]) ### END YOUR CODE return word_embeddings, tag_embeddings, deprel_embeddings
def add_prediction_op(self): x = self.add_embedding() xavier_initializer = xavier_weight_init() W1 = tf.Variable( xavier_initializer([ self.config.n_window_features * self.config.embed_size, self.config.hidden_size_1 ])) b1 = tf.Variable( tf.zeros([self.config.hidden_size_1], dtype=tf.float32)) U = tf.Variable( xavier_initializer( [self.config.hidden_size_2, self.config.n_classes])) W2 = tf.Variable( xavier_initializer( [self.config.hidden_size_1, self.config.hidden_size_2])) b2 = tf.Variable( tf.zeros([self.config.hidden_size_2], dtype=tf.float32)) W3 = tf.Variable( xavier_initializer( [self.config.hidden_size_1, self.config.hidden_size_2])) b3 = tf.Variable( tf.zeros([self.config.hidden_size_2], dtype=tf.float32)) W4 = tf.Variable( xavier_initializer( [self.config.hidden_size_1, self.config.hidden_size_2])) b4 = tf.Variable( tf.zeros([self.config.hidden_size_2], dtype=tf.float32)) b5 = tf.Variable(tf.zeros([self.config.n_classes], dtype=tf.float32)) layer_1 = tf.add(tf.matmul(x, W1), b1) layer_1 = tf.nn.relu(layer_1) layer_2 = tf.add(tf.matmul(layer_1, W2), b2) layer_2 = tf.nn.relu(layer_2) layer_3 = tf.add(tf.matmul(layer_2, W3), b3) layer_3 = tf.nn.relu(layer_3) layer_4 = tf.add(tf.matmul(layer_3, W4), b4) layer_4 = tf.nn.softmax(layer_4) distrib = tf.add( tf.matmul(layer_4, U), b5 ) # not actually the probability distrib yet because using softmax_cross_entropy pred = tf.nn.softmax(distrib) # self.regularization = self.config.lr*tf.nn.l2_loss(U) + self.config.lr*tf.nn.l2_loss(W) return pred
def add_prediction_op(self): x = self.add_embedding() init = xavier_weight_init() with tf.variable_scope("transformation"): b1 = tf.Variable(tf.zeros([self.config.hidden_size])) b2 = tf.Variable(tf.zeros([self.config.n_classes])) self.W = init([ self.config.n_features * self.config.embed_size, self.config.hidden_size ]) U = init([self.config.hidden_size, self.config.n_classes]) h = tf.nn.relu(tf.matmul(x, self.W) + b1) h_drop = tf.nn.dropout(h, 1 - self.dropout_placeholder) pred = tf.matmul(h_drop, U) + b2 return pred
def add_prediction_op(self): """Adds the single layer neural network The l h = Relu(W_w x_w + W_t x_t + W_d x_d + b1) h_drop = Dropout(h, dropout_rate) pred = h_drop U + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Excluding the softmax in predictions won't change the expected transition. Use the Xavier initializer from initialization.py for W_ and U. Initialize b1 and b2 with zeros. The dimensions of the various variables you will need to create are: W_w : (n_word_features * embed_size, hidden_size) W_t : (n_tag_features * embed_size, hidden_size) W_d : (n_deprel_features * embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Use the value self.dropout_placeholder in tf.nn.dropout directly Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x_w, x_t, x_d = self.add_embeddings() ### BEGIN YOUR CODE xavier_initialization = xavier_weight_init() #Initialize variables W_w = tf.Variable( xavier_initialization( (self.config.n_word_features * self.config.embed_size, self.config.hidden_size))) W_t = tf.Variable( xavier_initialization( (self.config.n_tag_features * self.config.embed_size, self.config.hidden_size))) W_d = tf.Variable( xavier_initialization( (self.config.n_deprel_features * self.config.embed_size, self.config.hidden_size))) U = tf.Variable( xavier_initialization( (self.config.hidden_size, self.config.n_classes))) b1 = tf.Variable(tf.zeros((self.config.hidden_size, ))) b2 = tf.Variable(tf.zeros((self.config.n_classes))) h = tf.nn.relu( tf.matmul(x_w, W_w) + tf.matmul(x_t, W_t) + tf.matmul(x_d, W_d) + b1) h_drop = tf.nn.dropout(h, self.dropout_placeholder) pred = tf.Variable((self.config.batch_size, self.config.n_classes)) pred = tf.matmul(h_drop, U) + b2 # pred = tf.reshape(pred, (self.config.batch_size, self.config.n_classes)) ### END YOUR CODE return pred
def add_embeddings(self): """Creates embeddings that map word, tag, deprels to vectors Embedding layers convert (sparse) ID representations to dense, lower-dimensional representations. Inputs are integers, outputs are floats. - Create 3 embedding matrices, one for each of the input types. Input values index the rows of the matrices to extract. The max bound (exclusive) on the values in the input can be found in {n_word_ids, n_tag_ids, n_deprel_ids} After lookup, the resulting tensors should each be of shape (None, n, embed_size), where n is one of {n_word_features, n_tag_features, n_deprel_features}. - Initialize the word_id embedding matrix with self.word_embeddings. Initialize the other two matrices with the Xavier initialization you implemented - Reshape the embedding tensors into shapes (None, n * embed_size) ** Embedding matrices should be variables, not constants! ** Use tf.nn.embedding_lookup. Also take a look at tf.reshape Returns: word_embeddings : tf.Tensor of type tf.float32 and shape (None, n_word_features * embed_size) tag_embeddings : tf.float32 (None, n_tag_features * embed_size) deprel_embeddings : tf.float32 (None, n_deprel_features * embed_size) """ ### BEGIN YOUR CODE # TA: # The add_embeddings() function should be used to not only create and initialize the embeddings, # but to also lookup embeddings corresponding to the deprel_ids, word_ids, and tag_ids that you feed in through create_feed_dict(). # The return value should therefore be the word_embeddings, tag_embeddings, and deprel_embeddings corresponding to the ids provided by the placeholders. # - Initialize the word_id embedding matrix with self.word_embeddings. word_embeddings = tf.Variable(self.word_embeddings, dtype=tf.float32) word_embeddings = tf.nn.embedding_lookup(word_embeddings, self.word_id_placeholder) # Input values index the rows of the matrices to extract. # The max bound (exclusive) on the values in the input can be found in {n_word_ids, n_tag_ids, n_deprel_ids} # After lookup, the resulting tensors should each be of shape (None, n, embed_size), # where n is one of {n_word_features, n_tag_features, n_deprel_features}. # Initialize the other two matrices with the Xavier initialization you implemented xavier_initializer = xavier_weight_init() # embeddings for tags shape = (self.config.n_tag_ids, self.config.embed_size) xavier_mat = xavier_initializer(shape) tag_embeddings = tf.Variable(xavier_mat, dtype=tf.float32) tag_embeddings = tf.nn.embedding_lookup(tag_embeddings, self.tag_id_placeholder) # embeddings for arc-labels shape = (self.config.n_deprel_ids, self.config.embed_size) xavier_mat = xavier_initializer(shape) deprel_embeddings = tf.Variable(xavier_mat, dtype=tf.float32) deprel_embeddings = tf.nn.embedding_lookup(deprel_embeddings, self.deprel_id_placeholder) # - Reshape the embedding tensors into shapes (None, n * embed_size) word_embeddings = tf.reshape( word_embeddings, [-1, self.config.n_word_features * self.config.embed_size]) tag_embeddings = tf.reshape( tag_embeddings, [-1, self.config.n_tag_features * self.config.embed_size]) deprel_embeddings = tf.reshape( deprel_embeddings, [-1, self.config.n_deprel_features * self.config.embed_size]) ### END YOUR CODE return word_embeddings, tag_embeddings, deprel_embeddings
def add_prediction_op(self): """Adds the single layer neural network The l h = Relu(W_w x_w + W_t x_t + W_d x_d + b1) h_drop = Dropout(h, dropout_rate) pred = h_drop U + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Excluding the softmax in predictions won't change the expected transition. Use the Xavier initializer from initialization.py for W_ and U. Initialize b1 and b2 with zeros. The dimensions of the various variables you will need to create are: W_w : (n_word_features * embed_size, hidden_size) W_t : (n_tag_features * embed_size, hidden_size) W_d : (n_deprel_features * embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Use the value self.dropout_placeholder in tf.nn.dropout directly Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ x_w, x_t, x_d = self.add_embeddings() xavier_initializer = xavier_weight_init() W_w = tf.Variable(xavier_initializer((self.config.n_word_features * self.config.embed_size, self.config.hidden_size))) W_t = tf.Variable(xavier_initializer((self.config.n_tag_features * self.config.embed_size, self.config.hidden_size))) W_d = tf.Variable(xavier_initializer((self.config.n_deprel_features* self.config.embed_size, self.config.hidden_size))) b1 = tf.Variable(tf.zeros((self.config.hidden_size,))) U = tf.Variable(xavier_initializer((self.config.hidden_size, self.config.n_classes))) b2 = tf.Variable(tf.zeros((self.config.n_classes))) x = tf.matmul(x_w, W_w) + tf.matmul(x_t, W_t) + tf.matmul(x_d, W_d) + b1 print("\n\t" + FLAGS.activation + " activation function") # compute first hidden layer if FLAGS.activation == 'cube': # cube activation function h = tf.pow(x, tf.constant(3, dtype=tf.float32)) else: h = tf.nn.relu(x) # add all weights and biases for l2 regularization if self.config.l2_beta: print("\tl2 regularization with beta " + str(self.config.l2_beta)) self.config.l2_loss += tf.nn.l2_loss(W_w) + tf.nn.l2_loss(W_t) + \ tf.nn.l2_loss(W_d) + tf.nn.l2_loss(b1) + \ tf.nn.l2_loss(U) + tf.nn.l2_loss(b2) print("\t" + str(FLAGS.hidden + 1) + " hidden layer(s) with size " + str(FLAGS.hidden_size)) if FLAGS.hidden: # initialize weights and biases for hidden layers w, b = {}, {} for i in range(FLAGS.hidden): w[i] = tf.Variable(xavier_initializer((self.config.hidden_size, self.config.hidden_size))) b[i] = tf.Variable(tf.random_normal([self.config.hidden_size])) def hidden_layers(x): layer = tf.nn.relu(tf.matmul(x, w[0]) + b[0]) for i in range(1, FLAGS.hidden): layer = tf.nn.relu(tf.matmul(layer, w[i]) + b[i]) return layer # apply dropout then compute additional hidden layers h_drop = tf.nn.dropout(h, self.dropout_placeholder) layers = hidden_layers(h_drop) pred = tf.matmul(layers, U) + b2 # add l2 loss for hidden weights and biases if self.config.l2_beta: for i in range(FLAGS.hidden): self.config.l2_loss += tf.nn.l2_loss(w[i]) + tf.nn.l2_loss(b[i]) else: h_drop = tf.nn.dropout(h, self.dropout_placeholder) pred = tf.matmul(h_drop, U) + b2 return pred
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) U = tf.get_variable( "U", shape=[self.config.hidden_size, self.config.n_classes], dtype=tf.float32, initializer=xavier_weight_init()) b2 = tf.get_variable("b2", shape=[ self.config.n_classes, ], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float32) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope("RNN"): for time_step in range(self.max_length): if (time_step > 0): tf.get_variable_scope().reuse_variables() o_t, h_t = cell(x[time_step], h_t) o_drop_t = tf.nn.dropout(o_t, 1 - self.config.dropout) y_t = tf.matmul(o_drop_t, U) + b2 preds.append(y_t) preds = tf.transpose(tf.stack(preds), [1, 0, 2]) assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the single layer neural network The l h = Relu(x_w W_w + x+t W_t + x_d W_d + b1) h_drop = Dropout(h, dropout_rate) pred = h_drop U + b2 Note that we are not applying a softmax to pred. The softmax will instead be done in the add_loss_op function, which improves efficiency because we can use tf.nn.softmax_cross_entropy_with_logits Excluding the softmax in predictions won't change the expected transition. Use the Xavier initializer from initialization.py for W_ and U. Initialize b1 and b2 with zeros. The dimensions of the various variables you will need to create are: W_w : (n_word_features * embed_size, hidden_size) W_t : (n_tag_features * embed_size, hidden_size) W_d : (n_deprel_features * embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, n_classes) b2: (n_classes) Use the value self.dropout_placeholder in tf.nn.dropout directly Returns: pred: tf.Tensor of shape (batch_size, n_classes) """ ### BEGIN YOUR CODE x_w, x_t, x_d = self.add_embeddings() # Code for Part3 """ if (self.config.addHidden2): w_w = tf.get_variable("W_w", [self.config.n_word_features * self.config.embed_size, self.config.hidden_size]) w_t = tf.get_variable("W_t", [self.config.n_tag_features * self.config.embed_size, self.config.hidden_size]) w_d = tf.get_variable("W_d", [self.config.n_deprel_features * self.config.embed_size, self.config.hidden_size]) wh = tf.get_variable("wh", [self.config.hidden_size, self.config.hidden2_size]) b1 = tf.get_variable("b1", [self.config.hidden_size]) b2 = tf.get_variable("b2", [self.config.hidden2_size]) b3 = tf.get_variable("b3", [self.config.n_classes]) u1 = tf.get_variable("U1", [self.config.hidden_size, self.config.hidden2_size]) u2 = tf.get_variable("U2", [self.config.hidden2_size, self.config.n_classes]) h1 = tf.nn.relu(tf.matmul(x_w, w_w) + tf.matmul(x_t, w_t) + tf.matmul(x_d, w_d) + b1, name="relu1") h2 = tf.nn.relu(tf.matmul(h1, wh) + b2, name="relu2") h2_drop = tf.nn.dropout(h2, self.dropout_placeholder, name="dropout2") h2_output = tf.matmul(h2_drop, u2) + b3 pred = h2_output else: """ xavier_initializer = xavier_weight_init() w_w = tf.get_variable("W_w", [self.config.n_word_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer) w_t = tf.get_variable("W_t", [self.config.n_tag_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer) w_d = tf.get_variable("W_d", [self.config.n_deprel_features * self.config.embed_size, self.config.hidden_size], initializer=xavier_initializer) b1 = tf.get_variable("b1", [self.config.hidden_size], initializer=tf.zeros_initializer()) b2 = tf.get_variable("b2", [self.config.n_classes], initializer=tf.zeros_initializer()) u = tf.get_variable("U", [self.config.hidden_size, self.config.n_classes], initializer=xavier_initializer) h = tf.nn.relu(tf.matmul(x_w, w_w) + tf.matmul(x_t, w_t) + tf.matmul(x_d, w_d) + b1, name="relu") h_drop = tf.nn.dropout(h, self.dropout_placeholder, name="dropout") pred = tf.matmul(h_drop, u) + b2 ### END YOUR CODE return pred
def add_model(self, window): """Adds the 1-hidden-layer NN. Hint: Use a variable_scope (e.g. "Layer") for the first hidden layer, and another variable_scope (e.g. "Softmax") for the linear transformation preceding the softmax. Make sure to use the xavier_weight_init you defined in the previous part to initialize weights. # 初始化assignment中有,Xavier initialization Hint: Make sure to add in regularization and dropout to this network. Regularization should be an addition to the cost function, while # cost function加上Regularization dropout should be added after both variable scopes. # both variable scopes=first hidden layer,softmax加上dropout Hint: You might consider using a tensorflow Graph Collection (e.g "total_loss") to collect the regularization and loss terms (which you # 用 tensorflow Graph 收集regularization and loss will add in add_loss_op below). Hint: Here are the dimensions of the various variables you will need to create W: (window_size*embed_size, hidden_size) b1: (hidden_size,) U: (hidden_size, label_size) b2: (label_size) https://www.tensorflow.org/versions/r0.7/api_docs/python/framework.html#graph-collections Args: window: tf.Tensor of shape (-1, window_size*embed_size) Returns: output: tf.Tensor of shape (batch_size, label_size) """ ### YOUR CODE HERE with tf.variable_scope('Layer1', initializer=xavier_weight_init() ) as scope: # 用initializer=xavier去初始化第一层 W = tf.get_variable( # 第一层有 W,b1,h 'W', [ self.config.window_size * self.config.embed_size, self.config.hidden_size ]) b1 = tf.get_variable('b1', [self.config.hidden_size]) h = tf.nn.sigmoid(tf.matmul(window, W) + b1) if self.config.l2: # L2 regularization for W tf.add_to_collection( 'total_loss', 0.5 * self.config.l2 * tf.nn.l2_loss(W) ) # 0.5 * self.config.l2 * tf.nn.l2_loss(W) # with tf.variable_scope('Layer2', initializer=xavier_weight_init()) as scope: # W2 = tf.get_variable('W2',[self.config.hidden_size, # self.config.hidden_size]) # b2 = tf.get_variable('b2',[self.config.hidden_size]) # h2 = tf.nn.tanh(tf.matmul(h, W2) + b2) # if self.config.l2: # L2 regularization for W # tf.add_to_collection('total_loss', 0.5 * self.config.l2 * tf.nn.l2_loss(W2)) # 0.5 * self.config.l2 * tf.nn.l2_loss(W) with tf.variable_scope('Output_Layer', initializer=xavier_weight_init()) as scope: U = tf.get_variable( 'U', [self.config.hidden_size, self.config.label_size]) b2 = tf.get_variable('b2', [self.config.label_size]) y = tf.matmul(h, U) + b2 if self.config.l2: tf.add_to_collection('total_loss', 0.5 * self.config.l2 * tf.nn.l2_loss(U)) output = tf.nn.dropout( y, self.dropout_placeholder) # 返回 output,两个variable_scope都带dropout ### END YOUR CODE return output