def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder outputs, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) preds = tf.sigmoid(state) return preds #state # preds
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) _, final_state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) #dtype: Required if initial_state is not provided or RNN state has a heterogeneous dtype. preds = tf.tanh(preds) ### END YOUR CODE return preds # state # preds
def add_prediction_op(self): """Adds the unrolled RNN: """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. with tf.variable_scope('prediction_variable'): U = tf.get_variable( 'U', shape=[Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) b_2 = tf.get_variable('b', shape=[Config.n_classes]) h_t = tf.get_variable( 'h_t', dtype=tf.float32, initializer=tf.zeros(shape=[1, Config.hidden_size])) with tf.variable_scope("RNN"): for time_step in range(self.max_length): if time_step == 0: o_t, h_t = cell(x[:, time_step], h_t) o_drop_t = tf.nn.dropout(o_t, dropout_rate) y_t = tf.matmul(o_drop_t, U) + b_2 else: scope = tf.get_variable_scope().reuse_variables() o_t, h_t = cell(x[:, time_step], h_t, scope=scope) o_drop_t = tf.nn.dropout(o_t, dropout_rate) y_t = tf.matmul(o_drop_t, U) + b_2 preds.append(y_t) preds = tf.stack(preds) print(preds) preds = tf.transpose(preds, perm=[1, 0, 2]) assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def compute_cell_dynamics(args): with tf.Graph().as_default(): # You can change this around, but make sure to reset it to 41 when # submitting. np.random.seed(41) tf.set_random_seed(41) with tf.variable_scope("dynamics"): x_placeholder = tf.placeholder(tf.float32, shape=(None,1)) h_placeholder = tf.placeholder(tf.float32, shape=(None,1)) def mat(x): return np.atleast_2d(np.array(x, dtype=np.float32)) def vec(x): return np.atleast_1d(np.array(x, dtype=np.float32)) with tf.variable_scope("cell"): Ur, Wr, Uz, Wz, Uo, Wo = [mat(3*x) for x in np.random.randn(6)] br, bz, bo = [vec(x) for x in np.random.randn(3)] params = [Ur, Wr, br, Uz, Wz, bz, Uo, Wo, bo] tf.get_variable("W_r", initializer=Wr) tf.get_variable("U_r", initializer=Ur) tf.get_variable("b_r", initializer=br) tf.get_variable("W_z", initializer=Wz) tf.get_variable("U_z", initializer=Uz) tf.get_variable("b_z", initializer=bz) tf.get_variable("W_o", initializer=Wo) tf.get_variable("U_o", initializer=Uo) tf.get_variable("b_o", initializer=bo) tf.get_variable_scope().reuse_variables() y_gru, h_gru = GRUCell(1,1)(x_placeholder, h_placeholder, scope="cell") y_rnn, h_rnn = RNNCell(1,1)(x_placeholder, h_placeholder, scope="cell") init = tf.global_variables_initializer() with tf.Session() as session: session.run(init) x = mat(np.zeros(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={x_placeholder: x, h_placeholder: h}) ht_rnn = session.run([h_rnn], feed_dict={x_placeholder: x, h_placeholder: h}) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 0, h, ht_rnn, ht_gru, params) x = mat(np.ones(1000)).T h = mat(np.linspace(-3, 3, 1000)).T ht_gru = session.run([h_gru], feed_dict={x_placeholder: x, h_placeholder: h}) ht_rnn = session.run([h_rnn], feed_dict={x_placeholder: x, h_placeholder: h}) ht_gru = np.array(ht_gru)[0] ht_rnn = np.array(ht_rnn)[0] make_dynamics_plot(args, 1, h, ht_rnn, ht_gru, params)
def add_prediction_op(self): cell = GRUCell(50,self.config.hidden_size) # MEANING OF ( a,b ) ? : x = self.inputs_placeholder init = tf.zeros(tf.shape(x[:,0,:])) # initial_state (Zero-state) output,last_state = tf.nn.dynamic_rnn(cell=cell,dtype=tf.float32,initial_state=init,inputs=x) preds = tf.sigmoid(last_state) return preds
def __init__(self, config): super(SequencePredictor, self).__init__() self.config = config self.grad_norm = None # Pick out the cell to use here. if self.config.cell == "rnn": self.cell = RNNCell(1, 1) elif self.config.cell == "gru": self.cell = GRUCell(1, 1) elif self.config.cell == "lstm": pass else: raise ValueError("Unsupported cell type.")
def __init__(self, helper, config, pretrained_embeddings, report=None): super(RNNModel, self).__init__(helper, config, report) self.max_length = min(Config.max_length, helper.max_length) Config.max_length = self.max_length # Just in case people make a mistake. self.pretrained_embeddings = nn.Embedding(pretrained_embeddings.shape[0], pretrained_embeddings.shape[1]).float() self.pretrained_embeddings.weight = nn.Parameter(torch.from_numpy(pretrained_embeddings).float()) if self.config.cell == "rnn": self.cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": self.cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Defining placeholders. self.Ub = nn.Linear(Config.hidden_size, Config.n_classes).float()
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) inputs = [x[:, t, :] for t in range(20)] #outputs, h = tf.nn.static_rnn(cell, inputs, dtype=tf.float32, initial_state=tf.zeros([100, cell.state_size])) # outputs, h = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32, parallel_iterations=1) with tf.variable_scope("rnn"): h = tf.zeros([100, cell.state_size], dtype=tf.float32) ### END YOUR CODE for time_step in range(20): if time_step > 0: tf.get_variable_scope().reuse_variables() output, h = cell(inputs[time_step], h) preds = tf.nn.sigmoid(h) ### END YOUR CODE return preds #state # preds
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) with tf.variable_scope("dynamic_rnn", reuse=tf.AUTO_REUSE) as sc: output, state = tf.nn.dynamic_rnn( cell, x, sequence_length=None, initial_state=None, dtype=tf.float32, parallel_iterations=None, swap_memory=False, time_major=False, scope=None) preds = tf.nn.sigmoid(state) ### END YOUR CODE return preds #state # preds
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) output, state = tf.nn.dynamic_rnn(cell, x, dtype=tf.float32) # print(state.get_shape()) # preds = tf.nn.sigmoid(state) # Works without sigmoid preds = output[:, -1, :] # or equivalently: preds = state # To make it work with sigmoid, we have to setup the initial state so that they are all 0s preds = tf.nn.sigmoid(preds) ### END YOUR CODE return preds #state # preds
def add_prediction_op(self): """Runs an rnn on the input using TensorFlows's @tf.nn.dynamic_rnn function, and returns the final state as a prediction. TODO: - Call tf.nn.dynamic_rnn using @cell below. See: https://www.tensorflow.org/api_docs/python/nn/recurrent_neural_networks - Apply a sigmoid transformation on the final state to normalize the inputs between 0 and 1. Returns: preds: tf.Tensor of shape (batch_size, 1) """ # Pick out the cell to use here. if self.config.cell == "rnn": cell = RNNCell(1, 1) elif self.config.cell == "gru": cell = GRUCell(1, 1) elif self.config.cell == "lstm": cell = tf.nn.rnn_cell.LSTMCell(1) else: raise ValueError("Unsupported cell type.") x = self.inputs_placeholder ### YOUR CODE HERE (~2-3 lines) initial_state = cell.zero_state(batch_size=tf.shape(x)[0], dtype=tf.float32) outputs, state = tf.nn.dynamic_rnn(cell, x, initial_state=initial_state, dtype=tf.float32) ## first axis is batcn size, second is seq length, take the last one preds = tf.nn.sigmoid(outputs[:, -1]) ### END YOUR CODE return preds #state # preds
def add_placeholders(self): """Generates placeholder variables to represent the input tensors These placeholders are used as inputs by the rest of the model building and will be fed data during training. Note that when "None" is in a placeholder's shape, it's flexible (so we can use different batch sizes without rebuilding the model). Adds following nodes to the computational graph input_placeholder: Input placeholder tensor of shape (None, self.max_length, n_features), type tf.int32 labels_placeholder: Labels placeholder tensor of shape (None, self.max_length), type tf.int32 mask_placeholder: Mask placeholder tensor of shape (None, self.max_length), type tf.bool dropout_placeholder: Dropout value placeholder (scalar), type tf.float32 TODO: Add these placeholders to self as the instance variables self.input_placeholder self.labels_placeholder self.mask_placeholder self.dropout_placeholder HINTS: - Remember to use self.max_length NOT Config.max_length (Don't change the variable names) """ ### YOUR CODE HERE (~4-6 lines) if self.config.cell == "rnn": self.cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": self.cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) self.embedded = nn.Embedding.from_pretrained(t.from_numpy(self.pretrained_embeddings), freeze=False) self.U = nn.init.xavier_uniform_(t.zeros((self.config.hidden_size, self.config.n_classes),requires_grad=True, dtype=t.float32)) self.b2 = t.zeros((self.config.n_classes), dtype=t.float32, requires_grad=True)
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() # (batch_size, max_length, n_features*embed_size) dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the RNNCell you defined, # but for Q3, we will run this code again with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. Initialize state as vector of zeros. with tf.variable_scope('layer1'): U = tf.get_variable('U', [Config.hidden_size, Config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable('b2', [Config.n_classes], initializer=tf.constant_initializer(0)) input_shape = tf.shape(x) h_t = tf.zeros([input_shape[0], Config.hidden_size]) # (batch_size, H) with tf.variable_scope("RNN"): for time_step in range(self.max_length): if time_step > 0: tf.get_variable_scope().reuse_variables() _, h_t = cell(x[:, time_step, :], h_t, scope="RNN") o_drop_t = tf.nn.dropout(h_t, dropout_rate) output = tf.matmul(o_drop_t, U) + b2 # (batch_size, n_classes) preds.append(output) # Make sure to reshape @preds here. Aims at (batch_size, max_length, n_classes) preds = tf.stack(preds, axis=1) assert preds.get_shape().as_list() == [None, self.max_length, self.config.n_classes], \ "predictions are not of the right shape. Expected {}, got {}".\ format([None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) with tf.variable_scope('output'): U = tf.get_variable( 'U', [self.config.hidden_size, self.config.n_classes], initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable('b2', [self.config.n_classes], initializer=tf.constant_initializer(0)) """ 初始化h0,h0的shape的最后一维很明显是hidden_size,而第一维应该是batch_size, 但这里并不写死,然后而是根据x的shape的第一维来确定batch_size的大小 """ x_shape = tf.shape(x) new_state = tf.zeros((x_shape[0], self.config.hidden_size)) ### END YOUR CODE with tf.variable_scope("RNN"): """ 1.首先,我们要进行RNN模型的训练就需要定义RNN模型的cell,也就是q2_rnn_cell.py中RNNCell类 的实例(这在269-272行已经定义过了) 2.先回顾一下,我们在q2_rnn_cell的__call__(input,state,scope)中定义了W_h,W_x和b 并且variable_scope(scope),所以,在第一次调用cell的时候,程序会创建scope的变量命名空间, 之后再次调用的时候应该tf.get_variable_scope().reuse_variables()来重用之前定义的变量, 也就是不能重复定义新的W_h,W_x和b。 3.定义常量h_0作为起始隐藏状态,注意是常量,不能训练的那种。 4.其他的按223-223行计算即可,把输出append进preds中 """ for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step > 0: tf.get_variable_scope().reuse_variables() #o_t, h_t = cell(x_t, h_{t-1}) #这里的x[:,time_step,:],第一个:代表取一个batch的全部数据,time_step指定第几个word, #最后一个:代表取这个批次的全部特征。即:取整个batch的第time_step个word的特征 output_state, new_state = cell(x[:, time_step, :], new_state, 'rnn-hidden') #o_drop_t = Dropout(o_t, dropout_rate) output_dropout = tf.nn.dropout(output_state, keep_prob=dropout_rate) #y_t = o_drop_t U + b_2 y_t = tf.matmul(output_dropout, U) + b2 preds.append(y_t) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) """ 先来推算一下preds的形状:preds是个list,长度为self.max_length,每一个元素一个batch的输出, 故每一个元素的形状为[batch_size,n_classes],故preds的形状为[max_length,batch_size,n_classes] """ #改成了tf.stack,不用tf.pack了 #https://blog.csdn.net/qq_33655521/article/details/83750546 preds = tf.stack(preds, axis=1) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x, x_reverse = self.add_embedding() # x_ = tf.unstack(x, axis = 1) dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! x_ = tf.transpose(x, perm=[1, 0, 2]) x_reverse_ = tf.transpose(x_reverse, perm=[1, 0, 2]) if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) preds = self._rnn_gru_pred_op(x_, cell) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) preds = self._rnn_gru_pred_op(x_, cell) elif self.config.cell == "lstm": cell = LSTMCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "bilstm": cell_forward = tf.contrib.rnn.BasicLSTMCell(Config.hidden_size) cell_backwards = tf.contrib.rnn.BasicLSTMCell(Config.hidden_size) preds = self._bilstm_pred_op(x_, x_reverse_, cell_forward, cell_backwards) elif self.config.cell == "tf-basiclstm": cell = tf.contrib.rnn.BasicLSTMCell(Config.hidden_size) preds = self._tf_lstm_pred_op(x_, cell) elif self.config.cell == "tf-lstm": cell = tf.contrib.rnn.LSTMCell(Config.hidden_size) preds = self._tf_lstm_pred_op(x_, cell) else: raise ValueError("Unsuppported cell type: " + self.config.cell) assert preds.get_shape().as_list() == [ None, self.max_length, Config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) U = tf.get_variable("U", shape=(self.config.hidden_size, self.config.n_classes), initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", shape=(self.config.n_classes, ), initializer=tf.constant_initializer(0.0)) h = tf.zeros(shape=(tf.shape(x)[0], self.config.hidden_size)) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step != 0: tf.get_variable_scope().reuse_variables() h, _ = cell(x[:, time_step, :], h) o_drop_t = tf.nn.dropout(h, dropout_rate) y_t = tf.matmul(o_drop_t, U) + tf.expand_dims(b2, 0) preds.append(y_t) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds, axis=1) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) U = tf.get_variable("Weights_Layer2", shape=(self.config.hidden_size, self.config.n_classes), initializer=tf.contrib.layers.xavier_initializer()) b_2 = tf.get_variable("bias_Layer2", shape=(1, self.config.n_classes), initializer=tf.zeros_initializer) h_0 = tf.zeros(shape=(1, self.config.hidden_size), dtype=tf.float32) # tf.shape(h_0) ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) o_t, h_0 = cell(x[:, time_step, :], h_0) # print o_t.shape # print h_0.shape tf.get_variable_scope().reuse_variables( ) # after calling cell for the first time setting reuse = true for variables o_drop_t = tf.nn.dropout(o_t, dropout_rate) pred = tf.matmul(o_drop_t, U) + b_2 preds.append(pred) pass ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) # print preds[0] # shape = (?,5) # print len(preds) # 52 preds is list of predictions for all length i.e. 52 [(5,) (5,) ....(5,)] preds = tf.stack( preds ) # after this stack operation, preds will have shape like = (maxlength, None, n_classes) # print preds.shape # we need to change it to (None, maxlength, n_classes) so perm[1,0,2] just interchange 1st <-> to 0th position preds = tf.transpose(preds, perm=[1, 0, 2]) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. This line of comment has nothing to do with the code below. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines).0 - with tf.variable_scope('Rnn_Out'): U = tf.get_variable( name="U", shape=[self.config.hidden_size, self.config.n_classes], dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable(name='b2', dtype=tf.float32, initializer=tf.constant( 0.0, shape=[self.config.n_classes])) with tf.variable_scope('state'): h = tf.zeros(name='hidden', shape=[ tf.shape(self.input_placeholder)[0], self.config.hidden_size ]) # Do not forget the dimension of batch in hidden state! # tf.constant do not support tf.shape(self.input_placeholder)[0], weird? ### END YOUR CODE with tf.variable_scope("RNN"): for time_step in range(self.max_length): ### YOUR CODE HERE (~6-10 lines) if time_step > 0: # there are tf.get_variable s in cell() tf.get_variable_scope().reuse_variables() o, h = cell(x[:, time_step, :], h) o_drop_t = tf.nn.dropout(o, keep_prob=dropout_rate) y = tf.matmul(o_drop_t, U) + b2 preds.append(y) ### END YOUR CODE # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) preds = tf.stack(preds) preds = tf.transpose(preds, perm=[1, 0, 2]) # the dimension of time step # became the first, so you need to switch them. ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/tf/zeros https://www.tensorflow.org/api_docs/python/tf/shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/api_guides/python/state_ops#Sharing_Variables - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.stack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/tf/stack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/tf/transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ # x is (None, max_length, n_features * embed_size) x = self.add_embedding() preds = [] # Predicted output at each timestep should go here! # Use the cell defined below. For Q2, we will just be using the # RNNCell you defined, but for Q3, we will run this code again # with a GRU cell! if self.config.cell == "rnn": cell = RNNCell(Config.n_features * Config.embed_size, Config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(Config.n_features * Config.embed_size, Config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Define U and b2 as variables. # Initialize state as vector of zeros. ### YOUR CODE HERE (~4-6 lines) with tf.variable_scope('Layer1'): xavier_initializer = tf.contrib.layers.xavier_initializer() U = tf.get_variable('U', shape=(Config.hidden_size, Config.n_classes), initializer=xavier_initializer) b2 = tf.get_variable('b2', shape=(Config.n_classes, ), initializer=tf.constant_initializer(0)) ### END YOUR CODE # h_0 = 0 # for t in 1 to T: # o_t, h_t = cell(x_t, h_ # {t - 1}) # o_drop_t = Dropout(o_t, dropout_rate) # y_t = o_drop_t # U + b_2 h_0 = tf.zeros((1, Config.hidden_size)) # input_size is (self.max_length, self.config.n_features * self.config.embed_size) # one cell does not care about the whole sequence, just one step cell = RNNCell(self.config.n_features * self.config.embed_size, self.config.hidden_size) with tf.variable_scope("RNN"): for time_step in range(self.max_length): x_t = x[:, time_step, :] # logging.info("x_t shape: {0}".format(x_t.get_shape().as_list())) if time_step == 0: # this will create variables: W_x, W_h, b_1 (called b) in namespace called RNN o_t, h_t = cell(x_t, h_0, scope="RNN") else: tf.get_variable_scope().reuse_variables() # output is (hidden_state,) o_t, h_t = cell(x_t, h_t, scope="RNN") o_drop_t = tf.nn.dropout(o_t, self.dropout_placeholder) # o_drop_t will be ( x C) # y_t will have the shape: (C,) # logging.info("o_drop_t shape: {0}".format(o_drop_t.get_shape().as_list())) y_t = tf.matmul(o_drop_t, U) + b2 # logging.info("y_t shape: {0}".format(y_t.get_shape().as_list())) preds.append(y_t) # Make sure to reshape @preds here. ### YOUR CODE HERE (~2-4 lines) # shape must be (batch_size or None, max_length, n_classes) preds = tf.stack(preds, axis=1) ### END YOUR CODE assert preds.get_shape().as_list() == [ None, self.max_length, self.config.n_classes ], "predictions are not of the right shape. Expected {}, got {}".format( [None, self.max_length, self.config.n_classes], preds.get_shape().as_list()) return preds