def __init__(self, X, y, filename='lstm_cell', inspect_rate=50, iterations=1000, learning_rate=0.000025, input_nodes=3, hidden_nodes=3, output_nodes=1): self.X = X self.y = y self.filename = filename self.inspect_rate = inspect_rate self.iterations = iterations self.learning_rate = learning_rate self.input_nodes = input_nodes self.hidden_nodes = hidden_nodes self.output_nodes = output_nodes # initialize placeholder nodes self.activation_input = np.atleast_2d(np.ones(self.input_nodes)) self.activation_hidden = np.apply_along_axis( lambda x: LSTMCell(5, 5, 5, 0.000025), 0, np.atleast_2d(np.ones(self.hidden_nodes))) self.activation_output = np.atleast_2d(np.ones(self.output_nodes)) # initialize weights self.ih_weights = np.random.randn(self.input_nodes, self.hidden_nodes) self.ho_weights = np.random.randn(self.hidden_nodes, self.output_nodes) # initialize placeholder deltas self.ih_deltas = np.zeros_like(self.ih_weights) self.ho_deltas = np.zeros_like(self.ho_weights)
def __init__(self, input_size, hidden_size, output_size): super(LSTMModel, self).__init__() self.hidden_size = hidden_size # Our own LSTM implementation self.lstm = LSTMCell(input_size, hidden_size) # Fully-connected output layer self.fc = torch.nn.Linear(hidden_size, output_size)
def add_prediction_op(self): """Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 TODO: There a quite a few things you'll need to do in this function: - Define the variables U, b_2. - Define the vector h as a constant and inititalize it with zeros. See tf.zeros and tf.shape for information on how to initialize this variable to be of the right shape. https://www.tensorflow.org/api_docs/python/constant_op/constant_value_tensors#zeros https://www.tensorflow.org/api_docs/python/array_ops/shapes_and_shaping#shape - In a for loop, begin to unroll the RNN sequence. Collect the predictions in a list. - When unrolling the loop, from the second iteration onwards, you will HAVE to call tf.get_variable_scope().reuse_variables() so that you do not create new variables in the RNN cell. See https://www.tensorflow.org/versions/master/how_tos/variable_scope/ - Concatenate and reshape the predictions into a predictions tensor. Hint: You will find the function tf.pack (similar to np.asarray) useful to assemble a list of tensors into a larger tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#pack Hint: You will find the function tf.transpose and the perms argument useful to shuffle the indices of the tensor. https://www.tensorflow.org/api_docs/python/array_ops/slicing_and_joining#transpose Remember: * Use the xavier initilization for matrices. * Note that tf.nn.dropout takes the keep probability (1 - p_drop) as an argument. The keep probability should be set to the value of self.dropout_placeholder Returns: pred: tf.Tensor of shape (batch_size, max_length, n_classes) """ x1, x2 = self.add_embedding() dropout_rate = self.dropout_placeholder # choose cell type if self.config.cell == "rnn": cell = RNNCell(self.config.embed_size, self.config.hidden_size) elif self.config.cell == "gru": cell = GRUCell(self.config.embed_size, self.config.hidden_size) elif self.config.cell == "lstm": cell = LSTMCell(self.config.embed_size, self.config.hidden_size) else: raise ValueError("Unsuppported cell type: " + self.config.cell) # Initialize hidden states to zero vectors of shape (num_examples, hidden_size) h1 = tf.zeros((tf.shape(x1)[0], self.config.hidden_size), tf.float32) h2 = tf.zeros((tf.shape(x2)[0], self.config.hidden_size), tf.float32) with tf.variable_scope("RNN1") as scope: for time_step in range(self.helper.max_length): if time_step != 0: scope.reuse_variables() o1_t, h1 = cell(x1[:, time_step, :], h1, scope) with tf.variable_scope("RNN2") as scope: for time_step in range(self.helper.max_length): if time_step != 0: scope.reuse_variables() o2_t, h2 = cell(x2[:, time_step, :], h2, scope) # h_drop1 = tf.nn.dropout(h1, dropout_rate) # h_drop2 = tf.nn.dropout(h2, dropout_rate) # use L2-regularization: sum of squares of all parameters if self.config.distance_measure == "l2": # perform logistic regression on l2-distance between h1 and h2 distance = norm(h1 - h2 + 0.000001) logistic_a = tf.Variable(0.0, dtype=tf.float32, name="logistic_a") logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.square(logistic_a) + tf.square( logistic_b) preds = tf.sigmoid(logistic_a * distance + logistic_b) elif self.config.distance_measure == "cosine": # perform logistic regression on cosine distance between h1 and h2 distance = cosine_distance(h1 + 0.000001, h2 + 0.000001) logistic_a = tf.Variable(1.0, dtype=tf.float32, name="logistic_a") logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.square(logistic_a) + tf.square( logistic_b) preds = tf.sigmoid(logistic_a * distance + logistic_b) elif self.config.distance_measure == "custom_coef": # perform logistic regression on the vector |h1-h2|, # equivalent to logistic regression on the (scalar) weighted Manhattan distance between h1 and h2, # ie. weighted sum of |h1-h2| logistic_a = tf.get_variable( "coef", [self.config.hidden_size], tf.float32, tf.contrib.layers.xavier_initializer()) logistic_b = tf.Variable(0.0, dtype=tf.float32, name="logistic_b") self.regularization_term = tf.reduce_sum( tf.square(logistic_a)) + tf.square(logistic_b) preds = tf.sigmoid( tf.reduce_sum(logistic_a * tf.abs(h1 - h2), axis=1) + logistic_b) elif self.config.distance_measure == "concat": # use softmax for prediction U = tf.get_variable( "U", (4 * self.config.hidden_size, self.config.n_classes), tf.float32, tf.contrib.layers.xavier_initializer()) b = tf.get_variable("b", (self.config.n_classes, ), tf.float32, tf.constant_initializer(0)) v = tf.nn.relu(tf.concat([h1, h2, tf.square(h1 - h2), h1 * h2], 1)) self.regularization_term = tf.reduce_sum( tf.square(U)) + tf.reduce_sum(tf.square(b)) preds = tf.matmul(v, U) + b elif self.config.distance_measure == "concat_steroids": # use softmax for prediction W1 = tf.get_variable( "W1", (4 * self.config.hidden_size, self.config.hidden_size), tf.float32, tf.contrib.layers.xavier_initializer()) b1 = tf.get_variable("b1", (self.config.hidden_size, ), tf.float32, tf.constant_initializer(0)) W2 = tf.get_variable( "W2", (self.config.hidden_size, self.config.n_classes), tf.float32, tf.contrib.layers.xavier_initializer()) b2 = tf.get_variable("b2", (self.config.n_classes, ), tf.float32, tf.constant_initializer(0)) v1 = tf.nn.relu(tf.concat( [h1, h2, tf.square(h1 - h2), h1 * h2], 1)) v2 = tf.nn.relu(tf.matmul(v1, W1) + b1) self.regularization_term = tf.reduce_sum( tf.square(W1)) + tf.reduce_sum(tf.square(b1)) + tf.reduce_sum( tf.square(W2)) + tf.reduce_sum(tf.square(b2)) preds = tf.matmul(v2, W2) + b2 else: raise ValueError("Unsuppported distance type: " + self.config.distance_measure) return preds
def add_prediction_op(self): """ Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 Returns: pred: tf.Tensor of shape (batch_size, max_length, non_terminal_vocab) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! hidden = [] cell = LSTMCell(Config.n_token_features * Config.embed_size, Config.hidden_size) # Define U and b2 as variables. # Initialize state as vector of zeros. xinit = tf.contrib.layers.xavier_initializer(dtype=tf.float64) if not self.config.terminal_pred: output_size = self.config.non_terminal_vocab else: output_size = self.config.terminal_vocab U = tf.get_variable('U', shape=[self.config.hidden_size, output_size], initializer=xinit, dtype=tf.float64) b2 = tf.get_variable('b2', shape=[output_size], initializer=tf.constant_initializer(0.0), dtype=tf.float64) c_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float64) h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float64) state_tuple = (c_t, h_t) scope = "LSTM_terminal" if self.config.terminal_pred else "LSTM_non_terminal" ''' if self.config.cell == "lstmA": W_a = tf.get_variable('W_a', shape = [self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit) W_o = tf.get_variable('W_o', shape = [2*self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit) W_s = tf.get_variable('W_s', shape = [self.config.hidden_size, self.config.hidden_size], dtype = tf.float64, initializer = xinit) b_o = tf.get_variable('b_o', shape = [self.config.hidden_size], dtype = tf.float64, initializer = tf.constant_initializer(0.0)) b_s = tf.get_variable('b_s', shape = [self.config.hidden_size], dtype = tf.float64, initializer = tf.constant_initializer(0.0)) with tf.variable_scope(scope): for time_step in range(self.max_length): if time_step > 0: tf.get_variable_scope().reuse_variables() o_t, h_t= cell(x[:,time_step,:], state_tuple) ht = tf.reshape(tf.matmul(h_t, W_a), (tf.shape(x)[0], -1, self.config.hidden_size)) weights = tf.reduce_sum(ht * hidden, axis=2) * self.attn_mask_placeholder weights = tf.nn.softmax(weights) context = tf.reduce_sum(tf.reshape(weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden, axis = 1) o_drop_t = tf.nn.dropout(o_t, dropout_rate) preds.append(tf.matmul(o_drop_t, U) + b2) preds = tf.stack(preds, 1) final_preds = tf.boolean_mask(preds, self.mask_placeholder) ''' with tf.variable_scope(scope): for time_step in range(self.max_length): if time_step > 0: tf.get_variable_scope().reuse_variables() o_t, h_t = cell(x[:, time_step, :], state_tuple) o_drop_t = tf.nn.dropout(o_t, dropout_rate) preds.append(tf.matmul(o_drop_t, U) + b2) if self.config.cell == "lstmA": W_a = tf.get_variable('W_a', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) W_o = tf.get_variable( 'W_o', shape=[2 * self.config.hidden_size, output_size], dtype=tf.float64, initializer=xinit) W_s = tf.get_variable('W_s', shape=[output_size, output_size], dtype=tf.float64, initializer=xinit) b_o = tf.get_variable( 'b_o', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) b_s = tf.get_variable( 'b_s', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) hidden.append(h_t[1]) hidden_stack = tf.stack(hidden, 1) ht = tf.reshape( tf.matmul(h_t[1], W_a), (tf.shape(x)[0], -1, self.config.hidden_size)) #print "ht shape: ", ht.get_shape().as_list() #print "hidden shape: ", hidden_stack.get_shape().as_list() #print "time step: ", time_step #print "original mask: ", self.attn_mask_placeholder.get_shape().as_list() #print "mask: ", tf.slice(self.attn_mask_placeholder, [0,0], [-1,time_step + 1]) weights = tf.reduce_sum( ht * hidden_stack, axis=2) * tf.slice( self.attn_mask_placeholder, [0, 0], [-1, time_step + 1]) weights = tf.nn.softmax(weights) context = tf.reduce_sum(tf.reshape( weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden_stack, axis=1) #print "context shape: ", context.get_shape().as_list() #print "weights: ", weights.get_shape().as_list() #replace last hidden state with context hidden = hidden[:-1] + [context] preds = tf.stack(preds, 1) hidden = tf.stack(hidden, 1) #print hidden.get_shape().as_list() final_preds = tf.boolean_mask(preds, self.mask_placeholder) final_hidden = tf.boolean_mask(hidden, self.mask_placeholder) #print final_hidden.get_shape().as_list() if self.config.cell == "lstmA": W_a = tf.get_variable( 'W_a', shape=[self.config.hidden_size, self.config.hidden_size], dtype=tf.float64, initializer=xinit) W_o = tf.get_variable( 'W_o', shape=[2 * self.config.hidden_size, output_size], dtype=tf.float64, initializer=xinit) W_s = tf.get_variable('W_s', shape=[output_size, output_size], dtype=tf.float64, initializer=xinit) b_o = tf.get_variable('b_o', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) b_s = tf.get_variable('b_s', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) ht = tf.reshape(tf.matmul(final_hidden, W_a), (tf.shape(x)[0], -1, self.config.hidden_size)) weights = tf.reduce_sum(ht * hidden, axis=2) * self.attn_mask_placeholder weights = tf.nn.softmax(weights) context = tf.reduce_sum( tf.reshape(weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden, axis=1) #print context.get_shape().as_list() final_preds = tf.tanh( tf.matmul(tf.concat(1, [context, final_hidden]), W_o) + b_o) final_preds = tf.matmul(final_preds, W_s) + b_s if self.config.terminal_pred: nt = tf.nn.embedding_lookup( self.embeddings, self.next_non_terminal_input_placeholder) nt = tf.reshape( nt, [-1, self.config.n_token_features * self.config.embed_size]) U_nt = tf.get_variable( 'U_nt', shape=[self.config.hidden_size, output_size], initializer=xinit) b_t = tf.get_variable('b_t', shape=[output_size], initializer=tf.constant_initializer(0.0)) final_preds = final_preds + tf.matmul(nt, U_nt) + b_t return final_preds
def add_prediction_op(self): """ Adds the unrolled RNN: h_0 = 0 for t in 1 to T: o_t, h_t = cell(x_t, h_{t-1}) o_drop_t = Dropout(o_t, dropout_rate) y_t = o_drop_t U + b_2 Returns: pred: tf.Tensor of shape (batch_size, max_length, non_terminal_vocab) """ x = self.add_embedding() dropout_rate = self.dropout_placeholder preds = [] # Predicted output at each timestep should go here! hidden = [] cell = LSTMCell(Config.n_token_features * Config.embed_size, Config.hidden_size) # Define U and b2 as variables. # Initialize state as vector of zeros. xinit = tf.contrib.layers.xavier_initializer(dtype=tf.float64) if not self.config.terminal_pred: output_size = self.config.non_terminal_vocab else: output_size = self.config.terminal_vocab U = tf.get_variable('U', shape=[self.config.hidden_size, output_size], initializer=xinit, dtype=tf.float64) b2 = tf.get_variable('b2', shape=[output_size], initializer=tf.constant_initializer(0.0), dtype=tf.float64) c_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float64) h_t = tf.zeros([tf.shape(x)[0], self.config.hidden_size], dtype=tf.float64) state_tuple = (c_t, h_t) scope = "LSTM_terminal" if self.config.terminal_pred else "LSTM_non_terminal" with tf.variable_scope(scope): for time_step in range(self.max_length): if time_step > 0: tf.get_variable_scope().reuse_variables() o_t, h_t = cell(x[:, time_step, :], state_tuple) o_drop_t = tf.nn.dropout(o_t, dropout_rate) preds.append(tf.matmul(o_drop_t, U) + b2) hidden.append(h_t[1]) if not (self.config.cell == "lstmAend") and not (self.config.cell == "lstm"): W_a = tf.get_variable('W_a', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) W_o = tf.get_variable( 'W_o', shape=[2 * self.config.hidden_size, output_size], dtype=tf.float64, initializer=xinit) W_s = tf.get_variable('W_s', shape=[output_size, output_size], dtype=tf.float64, initializer=xinit) b_o = tf.get_variable( 'b_o', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) b_s = tf.get_variable( 'b_s', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) hidden_stack = tf.stack(hidden, 1) ht = tf.reshape( tf.matmul(h_t[1], W_a), (tf.shape(x)[0], -1, self.config.hidden_size)) weights = tf.reduce_sum( ht * hidden_stack, axis=2) * tf.slice( self.attn_mask_placeholder, [0, 0], [-1, time_step + 1]) weights = tf.nn.softmax(weights) context = tf.reduce_sum(tf.reshape( weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden_stack, axis=1) context_hidden_sum = tf.add(context, h_t[1]) if (self.config.cell == "lstmAcont"): hidden = hidden[:-1] + [context] if (self.config.cell == "lstmAsum_fn"): W_alpha = tf.get_variable('W_alpha', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) W_beta = tf.get_variable('W_beta', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) U_alpha = tf.get_variable('U_alpha', shape=[ self.config.embed_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) U_beta = tf.get_variable('U_beta', shape=[ self.config.embed_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) alpha = tf.sigmoid( tf.matmul(context, W_alpha) + tf.matmul(x[:, time_step, :], U_alpha)) beta = tf.sigmoid( tf.matmul(h_t[1], W_beta) + tf.matmul(x[:, time_step, :], U_beta)) straightSum = alpha * context + beta * h_t[1] hidden = hidden[:-1] + [straightSum] if (self.config.cell == "lstmAsum"): alpha = tf.get_variable( 'alpha', shape=(), dtype=tf.float64, initializer=tf.random_uniform_initializer( -1.0, 2.0)) beta = tf.get_variable( 'beta', shape=(), dtype=tf.float64, initializer=tf.random_uniform_initializer( -1.0, 2.0)) straightSum = tf.add(tf.scalar_mul(alpha, context), tf.scalar_mul(beta, h_t[1])) hidden = hidden[:-1] + [straightSum] if (self.config.cell == "lstmAwsum_fn"): W_ph = tf.get_variable('W_ph', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) W_pc = tf.get_variable('W_pc', shape=[ self.config.hidden_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) W_px = tf.get_variable('W_px', shape=[ self.config.embed_size, self.config.hidden_size ], dtype=tf.float64, initializer=xinit) hTerm = tf.matmul(h_t[1], W_ph) cTerm = tf.matmul(context, W_pc) xTerm = tf.matmul(x[:, time_step, :], W_px) p_arr = tf.sigmoid(hTerm + cTerm + xTerm) weightedSum = (p_arr * context) + ( (1 - p_arr) * h_t[1]) hidden = hidden[:-1] + [weightedSum] preds = tf.stack(preds, 1) hidden = tf.stack(hidden, 1) final_preds = tf.boolean_mask(preds, self.mask_placeholder) final_hidden = tf.boolean_mask(hidden, self.mask_placeholder) if not (self.config.cell == "lstm"): W_a = tf.get_variable( 'W_a', shape=[self.config.hidden_size, self.config.hidden_size], dtype=tf.float64, initializer=xinit) W_o = tf.get_variable( 'W_o', shape=[2 * self.config.hidden_size, output_size], dtype=tf.float64, initializer=xinit) W_s = tf.get_variable('W_s', shape=[output_size, output_size], dtype=tf.float64, initializer=xinit) b_o = tf.get_variable('b_o', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) b_s = tf.get_variable('b_s', shape=[output_size], dtype=tf.float64, initializer=tf.constant_initializer(0.0)) ht = tf.reshape(tf.matmul(final_hidden, W_a), (tf.shape(x)[0], -1, self.config.hidden_size)) weights = tf.reduce_sum(ht * hidden, axis=2) * self.attn_mask_placeholder weights = tf.nn.softmax(weights) context = tf.reduce_sum( tf.reshape(weights, (tf.shape(weights)[0], tf.shape(weights)[1], -1)) * hidden, axis=1) final_preds = tf.tanh( tf.matmul(tf.concat(1, [context, final_hidden]), W_o) + b_o) final_preds = tf.matmul(final_preds, W_s) + b_s if self.config.terminal_pred: nt = tf.nn.embedding_lookup( self.embeddings, self.next_non_terminal_input_placeholder) nt = tf.reshape( nt, [-1, self.config.n_token_features * self.config.embed_size]) U_nt = tf.get_variable( 'U_nt', shape=[self.config.hidden_size, output_size], initializer=xinit, dtype=tf.float64) b_t = tf.get_variable('b_t', shape=[output_size], initializer=tf.constant_initializer(0.0), dtype=tf.float64) final_preds = final_preds + tf.matmul(nt, U_nt) + b_t return final_preds
def encode(self, inputs, masks, dropout, scope, lstm_size, encoder_state_input=None): """ In a generalized encode function, you pass in your inputs, masks, and an initial hidden state input into this function. :param inputs: Symbolic representations of your input :param masks: this is to make sure tf.nn.dynamic_rnn doesn't iterate through masked steps :param encoder_state_input: (Optional) pass this as initial hidden state to tf.nn.dynamic_rnn to build conditional representations :return: an encoded representation of your input. It can be context-level representation, word-level representation, or both. """ """ # print('\n') # print(inputs.get_shape()) # print('\n') """ #print(tf.shape(inputs)[0]) batch_size = tf.shape(inputs)[0] passage_length = tf.shape(inputs)[1] embedding_size = inputs.get_shape().as_list()[2] lstm = LSTMCell(lstm_size=lstm_size) # LSTM for encoding the question if encoder_state_input != None: state = encoder_state_input else: h = tf.zeros(shape=[batch_size, lstm_size], dtype=tf.float32) c = tf.zeros(shape=[batch_size, lstm_size], dtype=tf.float32) state = [h, c] with tf.variable_scope(scope): inpute_size = inputs.get_shape()[1] encoded = None # print(int(inpute_size), type(inpute_size)) for word_step in xrange(inputs.get_shape()[1]): if word_step >= 1: tf.get_variable_scope().reuse_variables() #print('SIZE MASK',masks[:,word_step].get_shape(),'-----------------------') # hidden_mask = tf.tile(masks[:,word_step], [1,lstm_size]) output, state = lstm(inputs[:, word_step], state, scope=scope) #*masks[:,word_step] """print('\n ~ ~ ~ Output shape' ) print(output.get_shape()) print('\n ~ ~ ~ Hidden mask' ) print(hidden_mask)""" # print('~ ~ ~ word_step ',word_step ) """ print('Iinputs.get_shape()[1]\n') print(inputs.get_shape()[1])s print(hidden_mask[:,word_step-1])""" # print(output.get_shape()) #print('SIZE HIDDEN MASK',masks[:,word_step].get_shape(),'-----------------------') #print('SIZE OUTPUT',output.get_shape(),'-----------------------') # output = tf.boolean_mask(output,masks[:,word_step],name='boolean_mask') #print('output bolean mask ',output.get_shape().as_list()) # apply dropout output = tf.nn.dropout(output, dropout) #print('output dropout ',output.get_shape().as_list()) #print('batch size ',batch_size.get_shape(), ' lstm size ', lstm_size ) output = tf.reshape( output, [batch_size, 1, lstm_size ]) #tf.reshape(output,[batch_size,1,lstm_size]) #print('output reshape ',output.get_shape().as_list()) # print(output.get_shape()) #print('\n ~ ~ ~ Output shape' ) #print(output.get_shape()) if word_step == 0: encoded = output else: # print('\n ~ ~ ~ ECONDED value (word_step != 0:)') # print(encoded) # print('\n ~ ~ ~ Output value (word_step != 0:)') # print(output) encoded = tf.concat_v2([encoded, output], 1) # print('\n ~ ~ ~ encoded shape' ) # print(encoded.get_shape()) return (encoded, state)