def __init__(self, num_embed_units, # pretrained wordvec size num_units, # RNN units size num_layers, # number of RNN layers num_vocabs, # vocabulary size wordvec, # pretrained wordvec matrix dataloader): # dataloader super().__init__() # load pretrained wordvec self.wordvec = wordvec # the dataloader self.dataloader = dataloader # TODO START # fill the parameter for multi-layer RNN self.cells = nn.Sequential(\ RNNCell(), *[RNNCell() for _ in range(num_layers - 1)] ) # TODO END # intialize other layers self.linear = nn.Linear(num_units, num_vocabs)
def __init__( self, num_embed_units, # pretrained wordvec size num_units, # RNN units size num_layers, # number of RNN layers num_vocabs, # vocabulary size wordvec, # pretrained wordvec matrix dataloader, # dataloader cell_type="GRU", # cell type, layer_norm=False, residual=False, ): super().__init__() # load pretrained wordvec self.num_vocabs = num_vocabs self.num_units = num_units self.wordvec = nn.Embedding.from_pretrained(wordvec) # the dataloader self.dataloader = dataloader # TODO START # fill the parameter for multi-layer RNN assert (num_layers >= 1) if cell_type == "RNN": self.cells = nn.Sequential( RNNCell(num_embed_units, num_units), *[ RNNCell(num_units, num_units) for _ in range(num_layers - 1) ]) elif cell_type == "GRU": self.cells = nn.Sequential( GRUCell(num_embed_units, num_units), *[ GRUCell(num_units, num_units) for _ in range(num_layers - 1) ]) elif cell_type == "LSTM": self.cells = nn.Sequential( LSTMCell(num_embed_units, num_units), *[ LSTMCell(num_units, num_units) for _ in range(num_layers - 1) ]) else: raise NotImplementedError("Unknown Cell Type") self.cell_type = cell_type # TODO END # intialize other layers self.linear = nn.Linear(num_units, num_vocabs) self.layer_norms = nn.Sequential( nn.LayerNorm(num_embed_units), * [nn.LayerNorm(num_units) for _ in range(num_layers - 1)]) if layer_norm else None self.residual = residual
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) _, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) preds = tf.nn.sigmoid(state) ### END YOUR CODE return preds #state # preds
def add_prediction_op(self): """Runs an rnn on the input Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder preds, _ = tf.nn.dynamic_rnn(cell, x, dtype = tf.float32) preds = tf.transpose(preds, [1, 0, 2]) preds = tf.sigmoid(preds[preds.shape[0]-1]) return preds
def __init__( self, num_embed_units, # pretrained wordvec size num_units, # RNN units size num_layers, # number of RNN layers num_vocabs, # vocabulary size wordvec, # pretrained wordvec matrix [len(vocab_list), n_dims] dataloader, # dataloader cell_kind): super().__init__() # load pretrained wordvec self.wordvec = wordvec # the dataloader self.dataloader = dataloader # TODO START self.num_vocabs = num_vocabs self.cross_entropy = nn.CrossEntropyLoss() # fill the parameter for multi-layer RNN self.cell_kind = cell_kind if self.cell_kind == "RNN": self.cells = nn.Sequential( RNNCell(num_embed_units, num_units), *[ RNNCell(num_units, num_units) for _ in range(num_layers - 1) ]) elif self.cell_kind == "GRU": self.cells = nn.Sequential( GRUCell(num_embed_units, num_units), *[ GRUCell(num_units, num_units) for _ in range(num_layers - 1) ]) elif self.cell_kind == "LSTM": self.cells = nn.Sequential( LSTMCell(num_embed_units, num_units), *[ LSTMCell(num_units, num_units) for _ in range(num_layers - 1) ]) # TODO END # intialize other layers self.linear = nn.Linear(num_units, num_vocabs)
def add_prediction_op_rnn(self, x, dropout_rate): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) preds = [] # Predicted output at each timestep should go here! # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) U = tf.get_variable('U', shape=[self.config.hidden_size, self.config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) b_2 = tf.get_variable('b_2', shape=[self.config.n_classes], initializer=tf.constant_initializer(0)) h = tf.zeros((tf.shape(x)[0], self.config.hidden_size)) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if(time_step > 0): tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop = tf.nn.dropout(o, dropout_rate) y = tf.matmul(o_drop, U) + b_2 preds.append(y) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds, axis=1) ### END YOUR CODE return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder if Config.cnn: repr_ = tf.expand_dims(tf.transpose(x, perm=[0, 2, 1]), -1) cnn_output1 = self.cnn_layer(repr_, Config.stepsize[0]) cnn_output2 = self.cnn_layer(repr_, Config.stepsize[1]) #cnn_output1_trunc = tf.slice(cnn_output1,[0,]*3,[-1,(self.max_length-Config.stepsize[1]+1)//2, Config.m]) #assert cnn_output1_trunc.get_shape().as_list() == [None, (self.max_length-Config.stepsize[1]+1)//2, Config.m], "truncated results are not of the right shape. Expected {}, got {}".format([None, (dim1-Config.step_size[1]+1)//2, self.config.m], cnn_output1_trunc.get_shape().as_list()) x = tf.concat([x, cnn_output1, cnn_output2], 2) #x = self.fully_connected_layer(x) preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "lstm": rnn_inputs = tf.nn.dropout(x, dropout_rate) lstm_features = self.bilstm_layer(rnn_inputs) preds = self.project_layer(lstm_features, Config.n_classes) return preds else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) self.U = tf.get_variable( 'U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) self.b2 = tf.get_variable( 'b2', [ Config.n_classes, ], initializer=tf.contrib.layers.xavier_initializer()) h = tf.zeros([tf.shape(x)[0], Config.hidden_size]) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step >= 1: tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop = tf.nn.dropout(o, dropout_rate) curPred = tf.matmul(o_drop, self.U) + self.b2 preds.append(curPred) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds) preds = tf.transpose(preds, perm=[1, 0, 2]) preds = tf.reshape(preds, [-1, Config.max_length, Config.n_classes]) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, Config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x1, x2 = self.add_embedding() dropout_rate = self.dropout_placeholder # choose cell type if self.config.cell == "rnn": cell = RNNCell(self.config.embed_size, self.config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(self.config.embed_size, self.config.hidden_size) elif self.config.cell == "lstm": cell = LSTMCell(self.config.embed_size, self.config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Initialize hidden states to zero vectors of shape (num_examples, hidden_size) h1 = tf.zeros((tf.shape(x1)[0], self.config.hidden_size), tf.float32) h2 = tf.zeros((tf.shape(x2)[0], self.config.hidden_size), tf.float32) with tf.variable_scope("RNN1") as scope: for time_step in range(self.helper.max_length): if time_step != 0: scope.reuse_variables() o1_t, h1 = cell(x1[:, time_step, :], h1, scope) with tf.variable_scope("RNN2") as scope: for time_step in range(self.helper.max_length): if time_step != 0: scope.reuse_variables() o2_t, h2 = cell(x2[:, time_step, :], h2, scope) # h_drop1 = tf.nn.dropout(h1, dropout_rate) # h_drop2 = tf.nn.dropout(h2, dropout_rate) # use L2-regularization: sum of squares of all parameters if self.config.distance_measure == "l2": # perform logistic regression on l2-distance between h1 and h2 distance = norm(h1 - h2 + 0.000001) logistic_a = tf.Variable(0.0, dtype=tf.float32, name="logistic_a") logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.square(logistic_a) + tf.square( logistic_b) preds = tf.sigmoid(logistic_a * distance + logistic_b) elif self.config.distance_measure == "cosine": # perform logistic regression on cosine distance between h1 and h2 distance = cosine_distance(h1 + 0.000001, h2 + 0.000001) logistic_a = tf.Variable(1.0, dtype=tf.float32, name="logistic_a") logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.square(logistic_a) + tf.square( logistic_b) preds = tf.sigmoid(logistic_a * distance + logistic_b) elif self.config.distance_measure == "custom_coef": # perform logistic regression on the vector |h1-h2|, # equivalent to logistic regression on the (scalar) weighted Manhattan distance between h1 and h2, # ie. weighted sum of |h1-h2| logistic_a = tf.get_variable( "coef", [self.config.hidden_size], tf.float32, tf.contrib.layers.xavier_initializer()) logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.reduce_sum( tf.square(logistic_a)) + tf.square(logistic_b) preds = tf.sigmoid( tf.reduce_sum(logistic_a * tf.abs(h1 - h2), axis=1) + logistic_b) elif self.config.distance_measure == "concat": # use softmax for prediction U = tf.get_variable( "U", (4 * self.config.hidden_size, self.config.n_classes), tf.float32, tf.contrib.layers.xavier_initializer()) b = tf.get_variable("b", (self.config.n_classes, ), tf.float32, tf.constant_initializer(0)) v = tf.nn.relu(tf.concat([h1, h2, tf.square(h1 - h2), h1 * h2], 1)) self.regularization_term = tf.reduce_sum( tf.square(U)) + tf.reduce_sum(tf.square(b)) preds = tf.matmul(v, U) + b elif self.config.distance_measure == "concat_steroids": # use softmax for prediction W1 = tf.get_variable( "W1", (4 * self.config.hidden_size, self.config.hidden_size), tf.float32, tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", (self.config.hidden_size, ), tf.float32, tf.constant_initializer(0)) W2 = tf.get_variable( "W2", (self.config.hidden_size, self.config.n_classes), tf.float32, tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", (self.config.n_classes, ), tf.float32, tf.constant_initializer(0)) v1 = tf.nn.relu(tf.concat( [h1, h2, tf.square(h1 - h2), h1 * h2], 1)) v2 = tf.nn.relu(tf.matmul(v1, W1) + b1) self.regularization_term = tf.reduce_sum( tf.square(W1)) + tf.reduce_sum(tf.square(b1)) + tf.reduce_sum( tf.square(W2)) + tf.reduce_sum(tf.square(b2)) preds = tf.matmul(v2, W2) + b2 else: raise ValueError("Unsuppported distance type: " + self.config.distance_measure) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) U = tf.get_variable( "U", shape=[self.config.hidden_size, self.config.n_classes], dtype=tf.float32, initializer=xavier_weight_init()) b2 = tf.get_variable("b2", shape=[ self.config.n_classes, ], dtype=tf.float32, initializer=tf.constant_initializer(0.0)) h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float32) x = tf.transpose(x, [1, 0, 2]) with tf.variable_scope("RNN"): for time_step in range(self.max_length): if (time_step > 0): tf.get_variable_scope().reuse_variables() o_t, h_t = cell(x[time_step], h_t) o_drop_t = tf.nn.dropout(o_t, 1 - self.config.dropout) y_t = tf.matmul(o_drop_t, U) + b2 preds.append(y_t) preds = tf.transpose(tf.stack(preds), [1, 0, 2]) assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds